diff options
author | unknown <heikki@donna.mysql.fi> | 2001-10-10 22:47:08 +0300 |
---|---|---|
committer | unknown <heikki@donna.mysql.fi> | 2001-10-10 22:47:08 +0300 |
commit | 1904897be71cba7e6f2cf1192ba0cc2e8d907e00 (patch) | |
tree | fc361924d14a3d1727a8b88f61352ed039054720 /innobase | |
parent | 151ffe886b4b21499471658fdf01ea8347287092 (diff) | |
download | mariadb-git-1904897be71cba7e6f2cf1192ba0cc2e8d907e00.tar.gz |
ut0mem.c Merge changes in InnoDB-3.23.43b
ut0ut.c Merge changes in InnoDB-3.23.43b
trx0purge.c Merge changes in InnoDB-3.23.43b
trx0rec.c Merge changes in InnoDB-3.23.43b
trx0trx.c Merge changes in InnoDB-3.23.43b
trx0undo.c Merge changes in InnoDB-3.23.43b
thr0loc.c Merge changes in InnoDB-3.23.43b
sync0arr.c Merge changes in InnoDB-3.23.43b
sync0rw.c Merge changes in InnoDB-3.23.43b
sync0sync.c Merge changes in InnoDB-3.23.43b
srv0srv.c Merge changes in InnoDB-3.23.43b
srv0start.c Merge changes in InnoDB-3.23.43b
row0ins.c Merge changes in InnoDB-3.23.43b
row0mysql.c Merge changes in InnoDB-3.23.43b
row0purge.c Merge changes in InnoDB-3.23.43b
row0sel.c Merge changes in InnoDB-3.23.43b
row0umod.c Merge changes in InnoDB-3.23.43b
row0upd.c Merge changes in InnoDB-3.23.43b
row0vers.c Merge changes in InnoDB-3.23.43b
rem0cmp.c Merge changes in InnoDB-3.23.43b
que0que.c Merge changes in InnoDB-3.23.43b
pars0opt.c Merge changes in InnoDB-3.23.43b
pars0pars.c Merge changes in InnoDB-3.23.43b
lexyy.c Merge changes in InnoDB-3.23.43b
pars0grm.c Merge changes in InnoDB-3.23.43b
page0page.c Merge changes in InnoDB-3.23.43b
os0file.c Merge changes in InnoDB-3.23.43b
mtr0log.c Merge changes in InnoDB-3.23.43b
mem0pool.c Merge changes in InnoDB-3.23.43b
log0log.c Merge changes in InnoDB-3.23.43b
log0recv.c Merge changes in InnoDB-3.23.43b
lock0lock.c Merge changes in InnoDB-3.23.43b
ibuf0ibuf.c Merge changes in InnoDB-3.23.43b
fil0fil.c Merge changes in InnoDB-3.23.43b
dict0crea.c Merge changes in InnoDB-3.23.43b
dict0dict.c Merge changes in InnoDB-3.23.43b
dict0load.c Merge changes in InnoDB-3.23.43b
dict0mem.c Merge changes in InnoDB-3.23.43b
data0data.c Merge changes in InnoDB-3.23.43b
data0type.c Merge changes in InnoDB-3.23.43b
buf0buf.c Merge changes in InnoDB-3.23.43b
buf0lru.c Merge changes in InnoDB-3.23.43b
btr0btr.c Merge changes in InnoDB-3.23.43b
btr0cur.c Merge changes in InnoDB-3.23.43b
btr0pcur.c Merge changes in InnoDB-3.23.43b
btr0sea.c Merge changes in InnoDB-3.23.43b
data0type.ic Merge changes in InnoDB-3.23.43b
dict0dict.ic Merge changes in InnoDB-3.23.43b
mtr0mtr.ic Merge changes in InnoDB-3.23.43b
row0upd.ic Merge changes in InnoDB-3.23.43b
sync0ipm.ic Merge changes in InnoDB-3.23.43b
sync0rw.ic Merge changes in InnoDB-3.23.43b
sync0sync.ic Merge changes in InnoDB-3.23.43b
trx0rseg.ic Merge changes in InnoDB-3.23.43b
btr0pcur.ic Merge changes in InnoDB-3.23.43b
buf0buf.ic Merge changes in InnoDB-3.23.43b
data0data.ic Merge changes in InnoDB-3.23.43b
row0upd.h Merge changes in InnoDB-3.23.43b
srv0srv.h Merge changes in InnoDB-3.23.43b
sync0arr.h Merge changes in InnoDB-3.23.43b
sync0rw.h Merge changes in InnoDB-3.23.43b
sync0sync.h Merge changes in InnoDB-3.23.43b
trx0trx.h Merge changes in InnoDB-3.23.43b
ut0mem.h Merge changes in InnoDB-3.23.43b
data0data.h Merge changes in InnoDB-3.23.43b
data0type.h Merge changes in InnoDB-3.23.43b
db0err.h Merge changes in InnoDB-3.23.43b
dict0crea.h Merge changes in InnoDB-3.23.43b
dict0dict.h Merge changes in InnoDB-3.23.43b
dict0load.h Merge changes in InnoDB-3.23.43b
dict0mem.h Merge changes in InnoDB-3.23.43b
dict0types.h Merge changes in InnoDB-3.23.43b
fil0fil.h Merge changes in InnoDB-3.23.43b
ibuf0ibuf.h Merge changes in InnoDB-3.23.43b
lock0lock.h Merge changes in InnoDB-3.23.43b
log0log.h Merge changes in InnoDB-3.23.43b
mtr0mtr.h Merge changes in InnoDB-3.23.43b
rem0cmp.h Merge changes in InnoDB-3.23.43b
row0ins.h Merge changes in InnoDB-3.23.43b
row0mysql.h Merge changes in InnoDB-3.23.43b
btr0cur.h Merge changes in InnoDB-3.23.43b
btr0pcur.h Merge changes in InnoDB-3.23.43b
btr0sea.h Merge changes in InnoDB-3.23.43b
buf0buf.h Merge changes in InnoDB-3.23.43b
sql_table.cc Merge changes in InnoDB-3.23.43b
sql_db.cc Merge changes in InnoDB-3.23.43b
ha_innobase.cc Merge changes in InnoDB-3.23.43b
handler.cc Merge changes in InnoDB-3.23.43b
ha_innobase.h Merge changes in InnoDB-3.23.43b
handler.h Merge changes in InnoDB-3.23.43b
sql/ha_innobase.h:
Merge changes in InnoDB-3.23.43b
sql/handler.h:
Merge changes in InnoDB-3.23.43b
sql/ha_innobase.cc:
Merge changes in InnoDB-3.23.43b
sql/handler.cc:
Merge changes in InnoDB-3.23.43b
sql/sql_db.cc:
Merge changes in InnoDB-3.23.43b
sql/sql_table.cc:
Merge changes in InnoDB-3.23.43b
innobase/include/btr0cur.h:
Merge changes in InnoDB-3.23.43b
innobase/include/btr0pcur.h:
Merge changes in InnoDB-3.23.43b
innobase/include/btr0sea.h:
Merge changes in InnoDB-3.23.43b
innobase/include/buf0buf.h:
Merge changes in InnoDB-3.23.43b
innobase/include/data0data.h:
Merge changes in InnoDB-3.23.43b
innobase/include/data0type.h:
Merge changes in InnoDB-3.23.43b
innobase/include/db0err.h:
Merge changes in InnoDB-3.23.43b
innobase/include/dict0crea.h:
Merge changes in InnoDB-3.23.43b
innobase/include/dict0dict.h:
Merge changes in InnoDB-3.23.43b
innobase/include/dict0load.h:
Merge changes in InnoDB-3.23.43b
innobase/include/dict0mem.h:
Merge changes in InnoDB-3.23.43b
innobase/include/dict0types.h:
Merge changes in InnoDB-3.23.43b
innobase/include/fil0fil.h:
Merge changes in InnoDB-3.23.43b
innobase/include/ibuf0ibuf.h:
Merge changes in InnoDB-3.23.43b
innobase/include/lock0lock.h:
Merge changes in InnoDB-3.23.43b
innobase/include/log0log.h:
Merge changes in InnoDB-3.23.43b
innobase/include/mtr0mtr.h:
Merge changes in InnoDB-3.23.43b
innobase/include/rem0cmp.h:
Merge changes in InnoDB-3.23.43b
innobase/include/row0ins.h:
Merge changes in InnoDB-3.23.43b
innobase/include/row0mysql.h:
Merge changes in InnoDB-3.23.43b
innobase/include/row0upd.h:
Merge changes in InnoDB-3.23.43b
innobase/include/srv0srv.h:
Merge changes in InnoDB-3.23.43b
innobase/include/sync0arr.h:
Merge changes in InnoDB-3.23.43b
innobase/include/sync0rw.h:
Merge changes in InnoDB-3.23.43b
innobase/include/sync0sync.h:
Merge changes in InnoDB-3.23.43b
innobase/include/trx0trx.h:
Merge changes in InnoDB-3.23.43b
innobase/include/ut0mem.h:
Merge changes in InnoDB-3.23.43b
innobase/include/btr0pcur.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/buf0buf.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/data0data.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/data0type.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/dict0dict.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/mtr0mtr.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/row0upd.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/sync0ipm.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/sync0rw.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/sync0sync.ic:
Merge changes in InnoDB-3.23.43b
innobase/include/trx0rseg.ic:
Merge changes in InnoDB-3.23.43b
innobase/btr/btr0btr.c:
Merge changes in InnoDB-3.23.43b
innobase/btr/btr0cur.c:
Merge changes in InnoDB-3.23.43b
innobase/btr/btr0pcur.c:
Merge changes in InnoDB-3.23.43b
innobase/btr/btr0sea.c:
Merge changes in InnoDB-3.23.43b
innobase/buf/buf0buf.c:
Merge changes in InnoDB-3.23.43b
innobase/buf/buf0lru.c:
Merge changes in InnoDB-3.23.43b
innobase/data/data0data.c:
Merge changes in InnoDB-3.23.43b
innobase/data/data0type.c:
Merge changes in InnoDB-3.23.43b
innobase/dict/dict0crea.c:
Merge changes in InnoDB-3.23.43b
innobase/dict/dict0dict.c:
Merge changes in InnoDB-3.23.43b
innobase/dict/dict0load.c:
Merge changes in InnoDB-3.23.43b
innobase/dict/dict0mem.c:
Merge changes in InnoDB-3.23.43b
innobase/fil/fil0fil.c:
Merge changes in InnoDB-3.23.43b
innobase/ibuf/ibuf0ibuf.c:
Merge changes in InnoDB-3.23.43b
innobase/lock/lock0lock.c:
Merge changes in InnoDB-3.23.43b
innobase/log/log0log.c:
Merge changes in InnoDB-3.23.43b
innobase/log/log0recv.c:
Merge changes in InnoDB-3.23.43b
innobase/mem/mem0pool.c:
Merge changes in InnoDB-3.23.43b
innobase/mtr/mtr0log.c:
Merge changes in InnoDB-3.23.43b
innobase/os/os0file.c:
Merge changes in InnoDB-3.23.43b
innobase/page/page0page.c:
Merge changes in InnoDB-3.23.43b
innobase/pars/lexyy.c:
Merge changes in InnoDB-3.23.43b
innobase/pars/pars0grm.c:
Merge changes in InnoDB-3.23.43b
innobase/pars/pars0opt.c:
Merge changes in InnoDB-3.23.43b
innobase/pars/pars0pars.c:
Merge changes in InnoDB-3.23.43b
innobase/que/que0que.c:
Merge changes in InnoDB-3.23.43b
innobase/rem/rem0cmp.c:
Merge changes in InnoDB-3.23.43b
innobase/row/row0ins.c:
Merge changes in InnoDB-3.23.43b
innobase/row/row0mysql.c:
Merge changes in InnoDB-3.23.43b
innobase/row/row0purge.c:
Merge changes in InnoDB-3.23.43b
innobase/row/row0sel.c:
Merge changes in InnoDB-3.23.43b
innobase/row/row0umod.c:
Merge changes in InnoDB-3.23.43b
innobase/row/row0upd.c:
Merge changes in InnoDB-3.23.43b
innobase/row/row0vers.c:
Merge changes in InnoDB-3.23.43b
innobase/srv/srv0srv.c:
Merge changes in InnoDB-3.23.43b
innobase/srv/srv0start.c:
Merge changes in InnoDB-3.23.43b
innobase/sync/sync0arr.c:
Merge changes in InnoDB-3.23.43b
innobase/sync/sync0rw.c:
Merge changes in InnoDB-3.23.43b
innobase/sync/sync0sync.c:
Merge changes in InnoDB-3.23.43b
innobase/thr/thr0loc.c:
Merge changes in InnoDB-3.23.43b
innobase/trx/trx0purge.c:
Merge changes in InnoDB-3.23.43b
innobase/trx/trx0rec.c:
Merge changes in InnoDB-3.23.43b
innobase/trx/trx0trx.c:
Merge changes in InnoDB-3.23.43b
innobase/trx/trx0undo.c:
Merge changes in InnoDB-3.23.43b
innobase/ut/ut0mem.c:
Merge changes in InnoDB-3.23.43b
innobase/ut/ut0ut.c:
Merge changes in InnoDB-3.23.43b
BitKeeper/etc/logging_ok:
Logging to logging@openlogging.org accepted
Diffstat (limited to 'innobase')
84 files changed, 5047 insertions, 1190 deletions
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c index 6da323867fb..e4e957ea7b6 100644 --- a/innobase/btr/btr0btr.c +++ b/innobase/btr/btr0btr.c @@ -2347,6 +2347,8 @@ btr_validate_level( mtr_start(&mtr); + mtr_x_lock(dict_tree_get_lock(tree), &mtr); + page = btr_root_get(tree, &mtr); space = buf_frame_get_space_id(page); diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c index 7783f618d6d..a64ed8b6fe1 100644 --- a/innobase/btr/btr0cur.c +++ b/innobase/btr/btr0cur.c @@ -256,7 +256,8 @@ btr_cur_search_to_nth_level( #ifdef UNIV_SEARCH_PERF_STAT info->n_searches++; #endif - if (latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ + if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED + && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ && !estimate && btr_search_guess_on_hash(index, info, tuple, mode, latch_mode, cursor, @@ -344,9 +345,7 @@ btr_cur_search_to_nth_level( retry_page_get: page = buf_page_get_gen(space, page_no, rw_latch, guess, buf_mode, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); if (page == NULL) { @@ -380,7 +379,7 @@ retry_page_get: } #endif ut_ad(0 == ut_dulint_cmp(tree->id, - btr_page_get_index_id(page))); + btr_page_get_index_id(page))); if (height == ULINT_UNDEFINED) { /* We are in the root node */ @@ -515,9 +514,7 @@ btr_cur_open_at_index_side( for (;;) { page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); ut_ad(0 == ut_dulint_cmp(tree->id, btr_page_get_index_id(page))); @@ -604,9 +601,7 @@ btr_cur_open_at_rnd_pos( for (;;) { page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); ut_ad(0 == ut_dulint_cmp(tree->id, btr_page_get_index_id(page))); @@ -1223,6 +1218,57 @@ btr_cur_parse_update_in_place( } /***************************************************************** +Updates a secondary index record when the update causes no size +changes in its fields. The only case when this function is currently +called is that in a char field characters change to others which +are identified in the collation order. */ + +ulint +btr_cur_update_sec_rec_in_place( +/*============================*/ + /* out: DB_SUCCESS or error number */ + btr_cur_t* cursor, /* in: cursor on the record to update; + cursor stays valid and positioned on the + same record */ + upd_t* update, /* in: update vector */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr) /* in: mtr */ +{ + dict_index_t* index = cursor->index; + dict_index_t* clust_index; + ulint err; + rec_t* rec; + dulint roll_ptr = ut_dulint_zero; + trx_t* trx = thr_get_trx(thr); + + /* Only secondary index records are updated using this function */ + ut_ad(0 == (index->type & DICT_CLUSTERED)); + + rec = btr_cur_get_rec(cursor); + + err = lock_sec_rec_modify_check_and_lock(0, rec, index, thr); + + if (err != DB_SUCCESS) { + + return(err); + } + + /* Remove possible hash index pointer to this record */ + btr_search_update_hash_on_delete(cursor); + + row_upd_rec_in_place(rec, update); + + clust_index = dict_table_get_first_index(index->table); + + /* Note that roll_ptr is really just a dummy value since + a secondary index record does not contain any sys columns */ + + btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec, clust_index, + update, trx, roll_ptr, mtr); + return(DB_SUCCESS); +} + +/***************************************************************** Updates a record when the update causes no size changes in its fields. */ ulint @@ -1248,7 +1294,7 @@ btr_cur_update_in_place( ibool was_delete_marked; /* Only clustered index records are updated using this function */ - ut_ad((cursor->index)->type & DICT_CLUSTERED); + ut_ad(cursor->index->type & DICT_CLUSTERED); rec = btr_cur_get_rec(cursor); index = cursor->index; @@ -2477,27 +2523,33 @@ btr_estimate_n_rows_in_range( } /*********************************************************************** -Estimates the number of different key values in a given index. */ +Estimates the number of different key values in a given index, for +each n-column prefix of the index where n <= dict_index_get_n_unique(index). +The estimates are stored in the array index->stat_n_diff_key_vals. */ -ulint +void btr_estimate_number_of_different_key_vals( /*======================================*/ - /* out: estimated number of key values */ dict_index_t* index) /* in: index */ { btr_cur_t cursor; page_t* page; rec_t* rec; - ulint total_n_recs = 0; - ulint n_diff_in_page; - ulint n_diff = 0; + ulint n_cols; ulint matched_fields; ulint matched_bytes; + ulint* n_diff; + ulint not_empty_flag = 0; ulint i; + ulint j; mtr_t mtr; - if (index->type & DICT_UNIQUE) { - return(index->table->stat_n_rows); + n_cols = dict_index_get_n_unique(index); + + n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong)); + + for (j = 0; j <= n_cols; j++) { + n_diff[j] = 0; } /* We sample some pages in the index to get an estimate */ @@ -2507,17 +2559,19 @@ btr_estimate_number_of_different_key_vals( btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); - /* Count the number of different key values minus one on this - index page: we subtract one because otherwise our algorithm - would give a wrong estimate for an index where there is - just one key value */ + /* Count the number of different key values minus one + for each prefix of the key on this index page: we subtract + one because otherwise our algorithm would give a wrong + estimate for an index where there is just one key value */ page = btr_cur_get_page(&cursor); rec = page_get_infimum_rec(page); rec = page_rec_get_next(rec); - n_diff_in_page = 0; + if (rec != page_get_supremum_rec(page)) { + not_empty_flag = 1; + } while (rec != page_get_supremum_rec(page) && page_rec_get_next(rec) @@ -2528,30 +2582,30 @@ btr_estimate_number_of_different_key_vals( cmp_rec_rec_with_match(rec, page_rec_get_next(rec), index, &matched_fields, &matched_bytes); - if (matched_fields < - dict_index_get_n_ordering_defined_by_user( - index)) { - n_diff_in_page++; - } + for (j = matched_fields + 1; j <= n_cols; j++) { + n_diff[j]++; + } + rec = page_rec_get_next(rec); } - - n_diff += n_diff_in_page; - - total_n_recs += page_get_n_recs(page); mtr_commit(&mtr); } - if (n_diff == 0) { - /* We play safe and assume that there are just two different - key values in the index */ - - return(2); + /* If we saw k borders between different key values on + BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many + there will be in index->stat_n_leaf_pages */ + + for (j = 0; j <= n_cols; j++) { + index->stat_n_diff_key_vals[j] = + (n_diff[j] * index->stat_n_leaf_pages + + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + + not_empty_flag) + / BTR_KEY_VAL_ESTIMATE_N_PAGES; } - - return(index->table->stat_n_rows / (total_n_recs / n_diff)); + + mem_free(n_diff); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c index 5e625553929..8ca3d41f7f9 100644 --- a/innobase/btr/btr0pcur.c +++ b/innobase/btr/btr0pcur.c @@ -62,8 +62,10 @@ btr_pcur_free_for_mysql( /****************************************************************** The position of the cursor is stored by taking an initial segment of the record the cursor is positioned on, before, or after, and copying it to the -cursor data structure. NOTE that the page where the cursor is positioned -must not be empty! */ +cursor data structure, or just setting a flag if the cursor id before the +first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the +page where the cursor is positioned must not be empty if the index tree is +not totally empty! */ void btr_pcur_store_position( @@ -93,9 +95,21 @@ btr_pcur_store_position( ut_a(cursor->latch_mode != BTR_NO_LATCHES); if (page_get_n_recs(page) == 0) { + /* It must be an empty index tree */ - /* Cannot store position! */ - btr_pcur_close(cursor); + ut_a(btr_page_get_next(page, mtr) == FIL_NULL + && btr_page_get_prev(page, mtr) == FIL_NULL); + + if (rec == page_get_supremum_rec(page)) { + + cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE; + cursor->old_stored = BTR_PCUR_OLD_STORED; + + return; + } + + cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE; + cursor->old_stored = BTR_PCUR_OLD_STORED; return; } @@ -140,13 +154,15 @@ btr_pcur_copy_stored_position( ut_memcpy((byte*)pcur_receive, (byte*)pcur_donate, sizeof(btr_pcur_t)); - pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size); + if (pcur_donate->old_rec_buf) { + + pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size); - ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf, + ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf, pcur_donate->buf_size); - pcur_receive->old_rec = pcur_receive->old_rec_buf + pcur_receive->old_rec = pcur_receive->old_rec_buf + (pcur_donate->old_rec - pcur_donate->old_rec_buf); - + } } /****************************************************************** @@ -158,7 +174,9 @@ to the last record LESS OR EQUAL to the stored record; the last record LESS than the user record which was the successor of the page infimum; (3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. */ +GREATER than the user record which was the predecessor of the supremum. +(4) cursor was positioned before the first or after the last in an empty tree: +restores to before first or after the last in the tree. */ ibool btr_pcur_restore_position( @@ -177,17 +195,33 @@ btr_pcur_restore_position( dtuple_t* tuple; ulint mode; ulint old_mode; + ibool from_left; mem_heap_t* heap; - ut_a((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) - || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); + ut_a(cursor->pos_state == BTR_PCUR_WAS_POSITIONED + || cursor->pos_state == BTR_PCUR_IS_POSITIONED); ut_a(cursor->old_stored == BTR_PCUR_OLD_STORED); + + if (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE + || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) { + + if (cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE) { + from_left = TRUE; + } else { + from_left = FALSE; + } + + btr_cur_open_at_index_side(from_left, + btr_pcur_get_btr_cur(cursor)->index, latch_mode, + btr_pcur_get_btr_cur(cursor), mtr); + return(FALSE); + } + ut_a(cursor->old_rec); page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor)); - if ((latch_mode == BTR_SEARCH_LEAF) - || (latch_mode == BTR_MODIFY_LEAF)) { + if (latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF) { /* Try optimistic restoration */ if (buf_page_optimistic_get(latch_mode, page, @@ -242,16 +276,15 @@ btr_pcur_restore_position( /* Restore the old search mode */ cursor->search_mode = old_mode; - if ((cursor->rel_pos == BTR_PCUR_ON) - && btr_pcur_is_on_user_rec(cursor, mtr) - && (0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor)))) { + if (cursor->rel_pos == BTR_PCUR_ON + && btr_pcur_is_on_user_rec(cursor, mtr) + && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) { /* We have to store the NEW value for the modify clock, since the cursor can now be on a different page! */ cursor->modify_clock = buf_frame_get_modify_clock( - buf_frame_align( - btr_pcur_get_rec(cursor))); + buf_frame_align(btr_pcur_get_rec(cursor))); mem_heap_free(heap); return(TRUE); @@ -366,6 +399,7 @@ btr_pcur_move_backward_from_page( latch_mode2 = BTR_MODIFY_PREV; } else { + latch_mode2 = 0; /* To eliminate compiler warning */ ut_error; } diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c index 616f8911aba..31ef8ce573b 100644 --- a/innobase/btr/btr0sea.c +++ b/innobase/btr/btr0sea.c @@ -680,9 +680,7 @@ btr_search_guess_on_hash( success = buf_page_get_known_nowait(latch_mode, page, BUF_MAKE_YOUNG, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); rw_lock_s_unlock(&btr_search_latch); diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index acf56ac4ddf..7d9cbf24948 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -34,6 +34,8 @@ Created 11/5/1995 Heikki Tuuri #include "ibuf0ibuf.h" #include "dict0dict.h" #include "log0recv.h" +#include "trx0undo.h" +#include "srv0srv.h" /* IMPLEMENTATION OF THE BUFFER POOL @@ -240,6 +242,11 @@ buf_page_is_corrupted( checksum = buf_calc_page_checksum(read_buf); + /* Note that InnoDB initializes empty pages to zero, and + early versions of InnoDB did not store page checksum to + the 4 most significant bytes of the page lsn field at the + end of a page: */ + if ((mach_read_from_4(read_buf + FIL_PAGE_LSN + 4) != mach_read_from_4(read_buf + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN + 4)) @@ -257,6 +264,71 @@ buf_page_is_corrupted( } /************************************************************************ +Prints a page to stderr. */ + +void +buf_page_print( +/*===========*/ + byte* read_buf) /* in: a database page */ +{ + dict_index_t* index; + ulint checksum; + char* buf; + + buf = mem_alloc(4 * UNIV_PAGE_SIZE); + + ut_sprintf_buf(buf, read_buf, UNIV_PAGE_SIZE); + + fprintf(stderr, + "InnoDB: Page dump in ascii and hex (%lu bytes):\n%s", + UNIV_PAGE_SIZE, buf); + fprintf(stderr, "InnoDB: End of page dump\n"); + + mem_free(buf); + + checksum = buf_calc_page_checksum(read_buf); + + fprintf(stderr, "InnoDB: Page checksum %lu stored checksum %lu\n", + checksum, mach_read_from_4(read_buf + + UNIV_PAGE_SIZE + - FIL_PAGE_END_LSN)); + fprintf(stderr, + "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn at page end %lu\n", + mach_read_from_4(read_buf + FIL_PAGE_LSN), + mach_read_from_4(read_buf + FIL_PAGE_LSN + 4), + mach_read_from_4(read_buf + UNIV_PAGE_SIZE + - FIL_PAGE_END_LSN + 4)); + if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) + == TRX_UNDO_INSERT) { + fprintf(stderr, + "InnoDB: Page may be an insert undo log page\n"); + } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) + == TRX_UNDO_UPDATE) { + fprintf(stderr, + "InnoDB: Page may be an update undo log page\n"); + } + + if (fil_page_get_type(read_buf) == FIL_PAGE_INDEX) { + fprintf(stderr, + "InnoDB: Page may be an index page "); + + fprintf(stderr, + "where index id is %lu %lu\n", + ut_dulint_get_high(btr_page_get_index_id(read_buf)), + ut_dulint_get_low(btr_page_get_index_id(read_buf))); + + index = dict_index_find_on_id_low( + btr_page_get_index_id(read_buf)); + if (index) { + fprintf(stderr, "InnoDB: and table %s index %s\n", + index->table_name, + index->name); + } + } +} + +/************************************************************************ Initializes a buffer control block when the buf_pool is created. */ static void @@ -334,6 +406,8 @@ buf_pool_create( frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE); buf_pool->frame_zero = frame; + buf_pool->high_end = frame + UNIV_PAGE_SIZE * curr_size; + /* Init block structs and assign frames for them */ for (i = 0; i < max_size; i++) { @@ -345,6 +419,9 @@ buf_pool_create( buf_pool->page_hash = hash_create(2 * max_size); buf_pool->n_pend_reads = 0; + + buf_pool->last_printout_time = time(NULL); + buf_pool->n_pages_read = 0; buf_pool->n_pages_written = 0; buf_pool->n_pages_created = 0; @@ -352,6 +429,8 @@ buf_pool_create( buf_pool->n_page_gets = 0; buf_pool->n_page_gets_old = 0; buf_pool->n_pages_read_old = 0; + buf_pool->n_pages_written_old = 0; + buf_pool->n_pages_created_old = 0; /* 2. Initialize flushing fields ---------------------------- */ @@ -379,6 +458,10 @@ buf_pool_create( for (i = 0; i < curr_size; i++) { block = buf_pool_get_nth_block(buf_pool, i); + + /* Wipe contents of page to eliminate a Purify warning */ + memset(block->frame, '\0', UNIV_PAGE_SIZE); + UT_LIST_ADD_FIRST(free, buf_pool->free, block); } @@ -650,10 +733,8 @@ buf_page_get_gen( buf_frame_t* guess, /* in: guessed frame or NULL */ ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, BUF_GET_NO_LATCH, BUF_GET_NOWAIT */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr) /* in: mini-transaction */ { buf_block_t* block; @@ -759,19 +840,13 @@ loop: if (mode == BUF_GET_NOWAIT) { if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - ,file, line - #endif - ); + success = rw_lock_s_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { ut_ad(rw_latch == RW_X_LATCH); - success = rw_lock_x_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - ,file, line - #endif - ); + success = rw_lock_x_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -796,18 +871,12 @@ loop: fix_type = MTR_MEMO_BUF_FIX; } else if (rw_latch == RW_S_LATCH) { - rw_lock_s_lock_func(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - ,0, file, line - #endif - ); + rw_lock_s_lock_func(&(block->lock), 0, file, line); + fix_type = MTR_MEMO_PAGE_S_FIX; } else { - rw_lock_x_lock_func(&(block->lock), 0 - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + rw_lock_x_lock_func(&(block->lock), 0, file, line); + fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -838,10 +907,8 @@ buf_page_optimistic_get_func( buf_frame_t* guess, /* in: guessed frame */ dulint modify_clock,/* in: modify clock value if mode is ..._GUESS_ON_CLOCK */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr) /* in: mini-transaction */ { buf_block_t* block; @@ -883,18 +950,12 @@ buf_page_optimistic_get_func( ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset)); if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + success = rw_lock_s_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { - success = rw_lock_x_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + success = rw_lock_x_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -971,10 +1032,8 @@ buf_page_get_known_nowait( ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ buf_frame_t* guess, /* in: the known page frame */ ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr) /* in: mini-transaction */ { buf_block_t* block; @@ -1017,18 +1076,12 @@ buf_page_get_known_nowait( ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + success = rw_lock_s_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_S_FIX; } else { - success = rw_lock_x_lock_func_nowait(&(block->lock) - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + success = rw_lock_x_lock_func_nowait(&(block->lock), + file, line); fix_type = MTR_MEMO_PAGE_X_FIX; } @@ -1318,9 +1371,26 @@ buf_page_io_complete( fprintf(stderr, "InnoDB: Database page corruption or a failed\n" "InnoDB: file read of page %lu.\n", block->offset); + fprintf(stderr, "InnoDB: You may have to recover from a backup.\n"); - exit(1); + + buf_page_print(block->frame); + + fprintf(stderr, + "InnoDB: Database page corruption or a failed\n" + "InnoDB: file read of page %lu.\n", block->offset); + fprintf(stderr, + "InnoDB: You may have to recover from a backup.\n"); + fprintf(stderr, + "InnoDB: It is also possible that your operating\n" + "InnoDB: system has corrupted its own file cache\n" + "InnoDB: and rebooting your computer removes the\n" + "InnoDB: error.\n"); + + if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { + exit(1); + } } if (recv_recovery_is_on()) { @@ -1623,12 +1693,27 @@ buf_print(void) } /************************************************************************* +Returns the number of pending buf pool ios. */ + +ulint +buf_get_n_pending_ios(void) +/*=======================*/ +{ + return(buf_pool->n_pend_reads + + buf_pool->n_flush[BUF_FLUSH_LRU] + + buf_pool->n_flush[BUF_FLUSH_LIST] + + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]); +} + +/************************************************************************* Prints info of the buffer i/o. */ void buf_print_io(void) /*==============*/ { + time_t current_time; + double time_elapsed; ulint size; ut_ad(buf_pool); @@ -1637,11 +1722,11 @@ buf_print_io(void) mutex_enter(&(buf_pool->mutex)); - printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU)); - printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free)); + printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free)); + printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU)); printf("Flush list length %lu \n", UT_LIST_GET_LEN(buf_pool->flush_list)); - printf("Buffer pool size in pages %lu\n", size); + printf("Buffer pool size %lu\n", size); printf("Pending reads %lu \n", buf_pool->n_pend_reads); @@ -1650,9 +1735,21 @@ buf_print_io(void) buf_pool->n_flush[BUF_FLUSH_LIST], buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]); + current_time = time(NULL); + time_elapsed = difftime(current_time, buf_pool->last_printout_time); + + buf_pool->last_printout_time = current_time; + printf("Pages read %lu, created %lu, written %lu\n", buf_pool->n_pages_read, buf_pool->n_pages_created, buf_pool->n_pages_written); + printf("%.2f reads/s, %.2f creates/s, %.2f writes/s\n", + (buf_pool->n_pages_read - buf_pool->n_pages_read_old) + / time_elapsed, + (buf_pool->n_pages_created - buf_pool->n_pages_created_old) + / time_elapsed, + (buf_pool->n_pages_written - buf_pool->n_pages_written_old) + / time_elapsed); if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) { printf("Buffer pool hit rate %lu / 1000\n", @@ -1660,10 +1757,14 @@ buf_print_io(void) - ((1000 * (buf_pool->n_pages_read - buf_pool->n_pages_read_old)) / (buf_pool->n_page_gets - buf_pool->n_page_gets_old))); + } else { + printf("No buffer pool activity since the last printout\n"); } buf_pool->n_page_gets_old = buf_pool->n_page_gets; buf_pool->n_pages_read_old = buf_pool->n_pages_read; + buf_pool->n_pages_created_old = buf_pool->n_pages_created; + buf_pool->n_pages_written_old = buf_pool->n_pages_written; mutex_exit(&(buf_pool->mutex)); } diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c index 142beaaaa15..eb63fa99f4a 100644 --- a/innobase/buf/buf0lru.c +++ b/innobase/buf/buf0lru.c @@ -551,6 +551,10 @@ buf_LRU_block_free_non_file_page( block->state = BUF_BLOCK_NOT_USED; +#ifdef UNIV_DEBUG + /* Wipe contents of page to reveal possible stale pointers to it */ + memset(block->frame, '\0', UNIV_PAGE_SIZE); +#endif UT_LIST_ADD_FIRST(free, buf_pool->free, block); } diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c index aecc56ec022..2254dcb6ae6 100644 --- a/innobase/data/data0data.c +++ b/innobase/data/data0data.c @@ -14,6 +14,7 @@ Created 5/30/1994 Heikki Tuuri #include "ut0rnd.h" #include "rem0rec.h" +#include "rem0cmp.h" #include "page0page.h" #include "dict0dict.h" #include "btr0cur.h" @@ -63,6 +64,53 @@ dtuple_get_nth_field_noninline( return(dtuple_get_nth_field(tuple, n)); } +/**************************************************************** +Returns TRUE if lengths of two dtuples are equal and respective data fields +in them are equal when compared with collation in char fields (not as binary +strings). */ + +ibool +dtuple_datas_are_ordering_equal( +/*============================*/ + /* out: TRUE if length and fieds are equal + when compared with cmp_data_data: + NOTE: in character type fields some letters + are identified with others! (collation) */ + dtuple_t* tuple1, /* in: tuple 1 */ + dtuple_t* tuple2) /* in: tuple 2 */ +{ + dfield_t* field1; + dfield_t* field2; + ulint n_fields; + ulint i; + + ut_ad(tuple1 && tuple2); + ut_ad(tuple1->magic_n = DATA_TUPLE_MAGIC_N); + ut_ad(tuple2->magic_n = DATA_TUPLE_MAGIC_N); + ut_ad(dtuple_check_typed(tuple1)); + ut_ad(dtuple_check_typed(tuple2)); + + n_fields = dtuple_get_n_fields(tuple1); + + if (n_fields != dtuple_get_n_fields(tuple2)) { + + return(FALSE); + } + + for (i = 0; i < n_fields; i++) { + + field1 = dtuple_get_nth_field(tuple1, i); + field2 = dtuple_get_nth_field(tuple2, i); + + if (0 != cmp_dfield_dfield(field1, field2)) { + + return(FALSE); + } + } + + return(TRUE); +} + /************************************************************************* Creates a dtuple for use in MySQL. */ @@ -408,7 +456,7 @@ dtuple_convert_big_rec( ulint size; ulint n_fields; ulint longest; - ulint longest_i; + ulint longest_i = ULINT_MAX; ibool is_externally_stored; ulint i; ulint j; diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c index 82c00a83fb2..5d0ddf3e887 100644 --- a/innobase/data/data0type.c +++ b/innobase/data/data0type.c @@ -28,7 +28,6 @@ dtype_validate( ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_MYSQL)); if (type->mtype == DATA_SYS) { - ut_a(type->prtype >= DATA_ROW_ID); ut_a(type->prtype <= DATA_MIX_ID); } @@ -45,11 +44,10 @@ dtype_print( { ulint mtype; ulint prtype; - + ulint len; + ut_a(type); - printf("DATA TYPE: "); - mtype = type->mtype; prtype = type->prtype; if (mtype == DATA_VARCHAR) { @@ -65,8 +63,10 @@ dtype_print( } else if (mtype == DATA_SYS) { printf("DATA_SYS"); } else { - printf("unknown type %lu", mtype); + printf("type %lu", mtype); } + + len = type->len; if ((type->mtype == DATA_SYS) || (type->mtype == DATA_VARCHAR) @@ -74,8 +74,13 @@ dtype_print( printf(" "); if (prtype == DATA_ROW_ID) { printf("DATA_ROW_ID"); + len = DATA_ROW_ID_LEN; } else if (prtype == DATA_ROLL_PTR) { printf("DATA_ROLL_PTR"); + len = DATA_ROLL_PTR_LEN; + } else if (prtype == DATA_TRX_ID) { + printf("DATA_TRX_ID"); + len = DATA_TRX_ID_LEN; } else if (prtype == DATA_MIX_ID) { printf("DATA_MIX_ID"); } else if (prtype == DATA_ENGLISH) { @@ -83,9 +88,9 @@ dtype_print( } else if (prtype == DATA_FINNISH) { printf("DATA_FINNISH"); } else { - printf("unknown prtype %lu", mtype); + printf("prtype %lu", mtype); } } - printf("; len %lu prec %lu\n", type->len, type->prec); + printf(" len %lu prec %lu", len, type->prec); } diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c index 478364fba8a..9d79983c9e5 100644 --- a/innobase/dict/dict0crea.c +++ b/innobase/dict/dict0crea.c @@ -17,9 +17,13 @@ Created 1/8/1996 Heikki Tuuri #include "page0page.h" #include "mach0data.h" #include "dict0boot.h" +#include "dict0dict.h" #include "que0que.h" #include "row0ins.h" +#include "row0mysql.h" #include "pars0pars.h" +#include "trx0roll.h" +#include "usr0sess.h" /********************************************************************* Based on a table object, this function builds the entry to be inserted @@ -1019,3 +1023,228 @@ function_exit: return(thr); } + +/******************************************************************** +Creates the foreign key constraints system tables inside InnoDB +at database creation or database start if they are not found or are +not of the right form. */ + +ulint +dict_create_or_check_foreign_constraint_tables(void) +/*================================================*/ + /* out: DB_SUCCESS or error code */ +{ + dict_table_t* table1; + dict_table_t* table2; + que_thr_t* thr; + que_t* graph; + ulint error; + trx_t* trx; + char* str; + + mutex_enter(&(dict_sys->mutex)); + + table1 = dict_table_get_low("SYS_FOREIGN"); + table2 = dict_table_get_low("SYS_FOREIGN_COLS"); + + if (table1 && table2 + && UT_LIST_GET_LEN(table1->indexes) == 3 + && UT_LIST_GET_LEN(table2->indexes) == 1) { + + /* Foreign constraint system tables have already been + created, and they are ok */ + + mutex_exit(&(dict_sys->mutex)); + + return(DB_SUCCESS); + } + + trx = trx_allocate_for_mysql(); + + trx->op_info = "creating foreign key sys tables"; + + if (table1) { + fprintf(stderr, + "InnoDB: dropping incompletely created SYS_FOREIGN table\n"); + row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); + } + + if (table2) { + fprintf(stderr, + "InnoDB: dropping incompletely created SYS_FOREIGN_COLS table\n"); + row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); + } + + fprintf(stderr, + "InnoDB: creating foreign key constraint system tables\n"); + + /* NOTE: in dict_load_foreigns we use the fact that + there are 2 secondary indexes on SYS_FOREIGN, and they + are defined just like below */ + + str = + "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n" + "BEGIN\n" + "CREATE TABLE\n" + "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR, REF_NAME CHAR, N_COLS INT);\n" + "CREATE UNIQUE CLUSTERED INDEX ID_IND ON SYS_FOREIGN (ID);\n" + "CREATE INDEX FOR_IND ON SYS_FOREIGN (FOR_NAME);\n" + "CREATE INDEX REF_IND ON SYS_FOREIGN (REF_NAME);\n" + "CREATE TABLE\n" + "SYS_FOREIGN_COLS(ID CHAR, POS INT, FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n" + "CREATE UNIQUE CLUSTERED INDEX ID_IND ON SYS_FOREIGN_COLS (ID, POS);\n" + "COMMIT WORK;\n" + "END;\n"; + + graph = pars_sql(str); + + ut_a(graph); + + graph->trx = trx; + trx->graph = NULL; + + graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + + ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0)); + + que_run_threads(thr); + + error = trx->error_state; + + if (error != DB_SUCCESS) { + ut_a(error == DB_OUT_OF_FILE_SPACE); + + fprintf(stderr, "InnoDB: creation failed\n"); + fprintf(stderr, "InnoDB: tablespace is full\n"); + fprintf(stderr, + "InnoDB: dropping incompletely created SYS_FOREIGN tables\n"); + + row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); + row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); + + error = DB_MUST_GET_MORE_FILE_SPACE; + } + + que_graph_free(graph); + + trx->op_info = ""; + + trx_free_for_mysql(trx); + + if (error == DB_SUCCESS) { + fprintf(stderr, + "InnoDB: foreign key constraint system tables created\n"); + } + + mutex_exit(&(dict_sys->mutex)); + + return(error); +} + +/************************************************************************ +Adds foreign key definitions to data dictionary tables in the database. */ + +ulint +dict_create_add_foreigns_to_dictionary( +/*===================================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* table, /* in: table */ + trx_t* trx) /* in: transaction */ +{ + dict_foreign_t* foreign; + que_thr_t* thr; + que_t* graph; + dulint id; + ulint len; + ulint error; + ulint i; + char buf2[50]; + char buf[10000]; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + if (NULL == dict_table_get_low("SYS_FOREIGN")) { + fprintf(stderr, + "InnoDB: table SYS_FOREIGN not found from internal data dictionary\n"); + return(DB_ERROR); + } + + foreign = UT_LIST_GET_FIRST(table->foreign_list); +loop: + if (foreign == NULL) { + + return(DB_SUCCESS); + } + + /* Build an InnoDB stored procedure which will insert the necessary + rows to SYS_FOREIGN and SYS_FOREIGN_COLS */ + + len = 0; + + len += sprintf(buf, + "PROCEDURE ADD_FOREIGN_DEFS_PROC () IS\n" + "BEGIN\n"); + + /* We allocate the new id from the sequence of table id's */ + id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); + + sprintf(buf2, "%lu_%lu", ut_dulint_get_high(id), + ut_dulint_get_low(id)); + foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(buf2) + 1); + ut_memcpy(foreign->id, buf2, ut_strlen(buf2) + 1); + + len += sprintf(buf + len, + "INSERT INTO SYS_FOREIGN VALUES('%lu_%lu', '%s', '%s', %lu);\n", + ut_dulint_get_high(id), + ut_dulint_get_low(id), + table->name, + foreign->referenced_table_name, + foreign->n_fields); + + for (i = 0; i < foreign->n_fields; i++) { + + len += sprintf(buf + len, + "INSERT INTO SYS_FOREIGN_COLS VALUES('%lu_%lu', %lu, '%s', '%s');\n", + ut_dulint_get_high(id), + ut_dulint_get_low(id), + i, + foreign->foreign_col_names[i], + foreign->referenced_col_names[i]); + } + + len += sprintf(buf + len,"COMMIT WORK;\nEND;\n"); + + graph = pars_sql(buf); + + ut_a(graph); + + graph->trx = trx; + trx->graph = NULL; + + graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + + ut_a(thr = que_fork_start_command(graph, SESS_COMM_EXECUTE, 0)); + + que_run_threads(thr); + + error = trx->error_state; + + que_graph_free(graph); + + if (error != DB_SUCCESS) { + ut_a(error == DB_OUT_OF_FILE_SPACE); + + fprintf(stderr, "InnoDB: foreign constraint creation failed\n"); + fprintf(stderr, "InnoDB: tablespace is full\n"); + + trx_general_rollback_for_mysql(trx, FALSE, NULL); + + error = DB_MUST_GET_MORE_FILE_SPACE; + + return(error); + } + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + + goto loop; +} diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c index 10d93fc6ecf..e0a7fd327a5 100644 --- a/innobase/dict/dict0dict.c +++ b/innobase/dict/dict0dict.c @@ -17,6 +17,7 @@ Created 1/8/1996 Heikki Tuuri #include "mach0data.h" #include "dict0boot.h" #include "dict0mem.h" +#include "dict0crea.h" #include "trx0undo.h" #include "btr0btr.h" #include "btr0cur.h" @@ -24,10 +25,12 @@ Created 1/8/1996 Heikki Tuuri #include "pars0pars.h" #include "pars0sym.h" #include "que0que.h" - +#include "rem0cmp.h" dict_sys_t* dict_sys = NULL; /* the dictionary system */ +rw_lock_t dict_foreign_key_check_lock; + #define DICT_HEAP_SIZE 100 /* initial memory heap size when creating a table or index object */ #define DICT_POOL_PER_PROCEDURE_HASH 512 /* buffer pool max size per stored @@ -137,12 +140,12 @@ dict_tree_find_index_low( dict_tree_t* tree, /* in: index tree */ rec_t* rec); /* in: record for which to find correct index */ /************************************************************************** -Prints a table data. */ +Removes a foreign constraint struct from the dictionet cache. */ static void -dict_table_print_low( -/*=================*/ - dict_table_t* table); /* in: table */ +dict_foreign_remove_from_cache( +/*===========================*/ + dict_foreign_t* foreign); /* in, own: foreign constraint */ /************************************************************************** Prints a column data. */ static @@ -164,6 +167,13 @@ void dict_field_print_low( /*=================*/ dict_field_t* field); /* in: field */ +/************************************************************************* +Frees a foreign key struct. */ +static +void +dict_foreign_free( +/*==============*/ + dict_foreign_t* foreign); /* in, own: foreign key struct */ /************************************************************************ Reserves the dictionary system mutex for MySQL. */ @@ -353,7 +363,8 @@ dict_table_get_on_id( { dict_table_t* table; - if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0) { + if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 + || trx->dict_operation) { /* It is a system table which will always exist in the table cache: we avoid acquiring the dictionary mutex, because if we are doing a rollback to handle an error in TABLE @@ -415,6 +426,10 @@ dict_init(void) dict_sys->size = 0; UT_LIST_INIT(dict_sys->table_LRU); + + rw_lock_create(&dict_foreign_key_check_lock); + rw_lock_set_level(&dict_foreign_key_check_lock, + SYNC_FOREIGN_KEY_CHECK); } /************************************************************************** @@ -535,6 +550,41 @@ dict_table_add_to_cache( } /************************************************************************** +Looks for an index with the given id. NOTE that we do not reserve +the dictionary mutex: this function is for emergency purposes like +printing info of a corrupt database page! */ + +dict_index_t* +dict_index_find_on_id_low( +/*======================*/ + /* out: index or NULL if not found from cache */ + dulint id) /* in: index id */ +{ + dict_table_t* table; + dict_index_t* index; + + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + + while (table) { + index = dict_table_get_first_index(table); + + while (index) { + if (0 == ut_dulint_cmp(id, index->tree->id)) { + /* Found */ + + return(index); + } + + index = dict_table_get_next_index(index); + } + + table = UT_LIST_GET_NEXT(table_LRU, table); + } + + return(NULL); +} + +/************************************************************************** Renames a table object. */ ibool @@ -544,10 +594,12 @@ dict_table_rename_in_cache( dict_table_t* table, /* in: table */ char* new_name) /* in: new name */ { - ulint fold; - ulint old_size; - char* name_buf; - ulint i; + dict_foreign_t* foreign; + dict_index_t* index; + ulint fold; + ulint old_size; + char* name_buf; + ulint i; ut_ad(table); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -589,6 +641,55 @@ dict_table_rename_in_cache( dict_sys->size += (mem_heap_get_size(table->heap) - old_size); + /* Update the table_name field in indexes */ + index = dict_table_get_first_index(table); + + while (index != NULL) { + index->table_name = table->name; + + index = dict_table_get_next_index(index); + } + + /* Update the table name fields in foreign constraints */ + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign != NULL) { + if (ut_strlen(foreign->foreign_table_name) < + ut_strlen(table->name)) { + /* Allocate a longer name buffer; + TODO: store buf len to save memory */ + foreign->foreign_table_name = mem_heap_alloc( + foreign->heap, + ut_strlen(table->name) + 1); + } + + ut_memcpy(foreign->foreign_table_name, table->name, + ut_strlen(table->name) + 1); + foreign->foreign_table_name[ut_strlen(table->name)] = '\0'; + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign != NULL) { + if (ut_strlen(foreign->referenced_table_name) < + ut_strlen(table->name)) { + /* Allocate a longer name buffer; + TODO: store buf len to save memory */ + foreign->referenced_table_name = mem_heap_alloc( + foreign->heap, + ut_strlen(table->name) + 1); + } + + ut_memcpy(foreign->referenced_table_name, table->name, + ut_strlen(table->name) + 1); + foreign->referenced_table_name[ut_strlen(table->name)] = '\0'; + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + return(TRUE); } @@ -600,6 +701,7 @@ dict_table_remove_from_cache( /*=========================*/ dict_table_t* table) /* in, own: table */ { + dict_foreign_t* foreign; dict_index_t* index; ulint size; ulint i; @@ -610,6 +712,29 @@ dict_table_remove_from_cache( /* printf("Removing table %s from dictionary cache\n", table->name); */ + /* Remove the foreign constraints from the cache */ + foreign = UT_LIST_GET_LAST(table->foreign_list); + + while (foreign != NULL) { + ut_a(0 == ut_strcmp(foreign->foreign_table_name, table->name)); + + dict_foreign_remove_from_cache(foreign); + foreign = UT_LIST_GET_LAST(table->foreign_list); + } + + /* Reset table field in referencing constraints */ + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign != NULL) { + ut_a(0 == ut_strcmp(foreign->referenced_table_name, + table->name)); + foreign->referenced_table = NULL; + foreign->referenced_index = NULL; + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + /* Remove the indexes from the cache */ index = UT_LIST_GET_LAST(table->indexes); @@ -856,6 +981,21 @@ dict_index_add_to_cache( new_index->tree = tree; } + if (!(new_index->type & DICT_UNIVERSAL)) { + + new_index->stat_n_diff_key_vals = + mem_heap_alloc(new_index->heap, + (1 + dict_index_get_n_unique(new_index)) + * sizeof(ib_longlong)); + /* Give some sensible values to stat_n_... in case we do + not calculate statistics quickly enough */ + + for (i = 0; i <= dict_index_get_n_unique(new_index); i++) { + + new_index->stat_n_diff_key_vals[i] = 100; + } + } + /* Add the index to the list of indexes stored in the tree */ UT_LIST_ADD_LAST(tree_indexes, tree->tree_indexes, new_index); @@ -1290,6 +1430,654 @@ dict_index_build_internal_non_clust( return(new_index); } +/*====================== FOREIGN KEY PROCESSING ========================*/ + +/************************************************************************* +Frees a foreign key struct. */ +static +void +dict_foreign_free( +/*==============*/ + dict_foreign_t* foreign) /* in, own: foreign key struct */ +{ + mem_heap_free(foreign->heap); +} + +/************************************************************************** +Removes a foreign constraint struct from the dictionary cache. */ +static +void +dict_foreign_remove_from_cache( +/*===========================*/ + dict_foreign_t* foreign) /* in, own: foreign constraint */ +{ + ut_ad(mutex_own(&(dict_sys->mutex))); + ut_a(foreign); + + if (foreign->referenced_table) { + UT_LIST_REMOVE(referenced_list, + foreign->referenced_table->referenced_list, foreign); + } + + if (foreign->foreign_table) { + UT_LIST_REMOVE(foreign_list, + foreign->foreign_table->foreign_list, foreign); + } + + dict_foreign_free(foreign); +} + +/************************************************************************** +Looks for the foreign constraint from the foreign and referenced lists +of a table. */ +static +dict_foreign_t* +dict_foreign_find( +/*==============*/ + /* out: foreign constraint */ + dict_table_t* table, /* in: table object */ + char* id) /* in: foreign constraint id */ +{ + dict_foreign_t* foreign; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign) { + if (ut_strcmp(id, foreign->id) == 0) { + + return(foreign); + } + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign) { + if (ut_strcmp(id, foreign->id) == 0) { + + return(foreign); + } + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + + return(NULL); +} + +/************************************************************************* +Tries to find an index whose first fields are the columns in the array, +in the same order. */ +static +dict_index_t* +dict_foreign_find_index( +/*====================*/ + /* out: matching index, NULL if not found */ + dict_table_t* table, /* in: table */ + char** columns,/* in: array of column names */ + ulint n_cols, /* in: number of columns */ + dict_index_t* types_idx)/* in: NULL or an index to whose types the + column types must match */ +{ + dict_index_t* index; + char* col_name; + ulint i; + + index = dict_table_get_first_index(table); + + while (index != NULL) { + if (dict_index_get_n_fields(index) >= n_cols) { + + for (i = 0; i < n_cols; i++) { + col_name = dict_index_get_nth_field(index, i) + ->col->name; + if (ut_strlen(columns[i]) != + ut_strlen(col_name) + || 0 != ut_memcmp(columns[i], + col_name, + ut_strlen(col_name))) { + break; + } + + if (types_idx && !cmp_types_are_equal( + dict_index_get_nth_type(index, i), + dict_index_get_nth_type(types_idx, i))) { + + break; + } + } + + if (i == n_cols) { + /* We found a matching index */ + + return(index); + } + } + + index = dict_table_get_next_index(index); + } + + return(NULL); +} + +/************************************************************************** +Adds a foreign key constraint object to the dictionary cache. May free +the object if there already is an object with the same identifier in. +At least one of the foreign table and the referenced table must already +be in the dictionary cache! */ + +ulint +dict_foreign_add_to_cache( +/*======================*/ + /* out: DB_SUCCESS or error code */ + dict_foreign_t* foreign) /* in, own: foreign key constraint */ +{ + dict_table_t* for_table; + dict_table_t* ref_table; + dict_foreign_t* for_in_cache = NULL; + dict_index_t* index; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + for_table = dict_table_check_if_in_cache_low( + foreign->foreign_table_name); + + ref_table = dict_table_check_if_in_cache_low( + foreign->referenced_table_name); + ut_a(for_table || ref_table); + + if (for_table) { + for_in_cache = dict_foreign_find(for_table, foreign->id); + } + + if (!for_in_cache && ref_table) { + for_in_cache = dict_foreign_find(ref_table, foreign->id); + } + + if (for_in_cache) { + /* Free the foreign object */ + mem_heap_free(foreign->heap); + } else { + for_in_cache = foreign; + } + + if (for_in_cache->referenced_table == NULL && ref_table) { + index = dict_foreign_find_index(ref_table, + for_in_cache->referenced_col_names, + for_in_cache->n_fields, + for_in_cache->foreign_index); + + if (index == NULL) { + if (for_in_cache == foreign) { + mem_heap_free(foreign->heap); + } + return(DB_CANNOT_ADD_CONSTRAINT); + } + + for_in_cache->referenced_table = ref_table; + for_in_cache->referenced_index = index; + UT_LIST_ADD_LAST(referenced_list, + ref_table->referenced_list, + for_in_cache); + } + + if (for_in_cache->foreign_table == NULL && for_table) { + index = dict_foreign_find_index(for_table, + for_in_cache->foreign_col_names, + for_in_cache->n_fields, + for_in_cache->referenced_index); + + if (index == NULL) { + if (for_in_cache == foreign) { + mem_heap_free(foreign->heap); + } + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + for_in_cache->foreign_table = for_table; + for_in_cache->foreign_index = index; + UT_LIST_ADD_LAST(foreign_list, + for_table->foreign_list, + for_in_cache); + } + + return(DB_SUCCESS); +} + +/************************************************************************* +Scans from pointer onwards. Stops if is at the start of a copy of +'string' where characters are compared without case sensitivity. Stops +also at '\0'. */ +static +char* +dict_scan_to( +/*=========*/ + + char* ptr, /* in: scan from */ + char* string) /* in: look for this */ +{ + ibool success; + ulint i; +loop: + if (*ptr == '\0') { + return(ptr); + } + + success = TRUE; + + for (i = 0; i < ut_strlen(string); i++) { + if (toupper((ulint)(ptr[i])) != toupper((ulint)(string[i]))) { + success = FALSE; + + break; + } + } + + if (success) { + + return(ptr); + } + + ptr++; + + goto loop; +} + +/************************************************************************* +Accepts a specified string. Comparisons are case-insensitive. */ +static +char* +dict_accept( +/*========*/ + /* out: if string was accepted, the pointer + is moved after that, else ptr is returned */ + char* ptr, /* in: scan from this */ + char* string, /* in: accept only this string as the next + non-whitespace string */ + ibool* success)/* out: TRUE if accepted */ +{ + char* old_ptr = ptr; + char* old_ptr2; + + *success = FALSE; + + while (isspace(*ptr)) { + ptr++; + } + + old_ptr2 = ptr; + + ptr = dict_scan_to(ptr, string); + + if (*ptr == '\0' || old_ptr2 != ptr) { + return(old_ptr); + } + + *success = TRUE; + + return(ptr + ut_strlen(string)); +} + +/************************************************************************* +Tries to scan a column name. */ +static +char* +dict_scan_col( +/*==========*/ + /* out: scanned to */ + char* ptr, /* in: scanned to */ + ibool* success,/* out: TRUE if success */ + dict_table_t* table, /* in: table in which the column is */ + dict_col_t** column, /* out: pointer to column if success */ + char** column_name)/* out: pointer to column->name if + success */ +{ + dict_col_t* col; + char* old_ptr; + ulint i; + + *success = FALSE; + + while (isspace(*ptr)) { + ptr++; + } + + if (*ptr == '\0') { + + return(ptr); + } + + old_ptr = ptr; + + while (!isspace(*ptr) && *ptr != ',' && *ptr != ')') { + ptr++; + } + + for (i = 0; i < dict_table_get_n_cols(table); i++) { + + col = dict_table_get_nth_col(table, i); + + if (ut_strlen(col->name) == (ulint)(ptr - old_ptr) + && 0 == ut_memcmp(col->name, old_ptr, + (ulint)(ptr - old_ptr))) { + + /* Found */ + + *success = TRUE; + *column = col; + *column_name = col->name; + + break; + } + } + + return(ptr); +} + +/************************************************************************* +Scans the referenced table name from an SQL string. */ +static +char* +dict_scan_table_name( +/*=================*/ + /* out: scanned to */ + char* ptr, /* in: scanned to */ + dict_table_t** table, /* out: table object or NULL if error */ + char* name) /* in: foreign key table name */ +{ + char* dot_ptr = NULL; + char* old_ptr; + ulint i; + char second_table_name[10000]; + + *table = NULL; + + while (isspace(*ptr)) { + ptr++; + } + + if (*ptr == '\0') { + + return(ptr); + } + + old_ptr = ptr; + + while (!isspace(*ptr) && *ptr != '(') { + if (*ptr == '.') { + dot_ptr = ptr; + } + + ptr++; + } + + if (ptr - old_ptr > 9000) { + return(old_ptr); + } + + if (dot_ptr == NULL) { + /* Copy the database name from 'name' to the start */ + for (i = 0;; i++) { + second_table_name[i] = name[i]; + if (name[i] == '/') { + i++; + break; + } + } + + ut_memcpy(second_table_name + i, old_ptr, ptr - old_ptr); + second_table_name[i + (ptr - old_ptr)] = '\0'; + } else { + ut_memcpy(second_table_name, old_ptr, ptr - old_ptr); + second_table_name[dot_ptr - old_ptr] = '/'; + second_table_name[ptr - old_ptr] = '\0'; + } + + *table = dict_table_get_low(second_table_name); + + return(ptr); +} + +/************************************************************************* +Returns the number of opening brackets '(' subtracted by the number +of closing brackets ')' between string and ptr. */ +static +int +dict_bracket_count( +/*===============*/ + /* out: bracket count */ + char* string, /* in: start of string */ + char* ptr) /* in: end of string */ +{ + int count = 0; + + while (string != ptr) { + if (*string == '(') { + count++; + } + if (*string == ')') { + count--; + } + + string++; + } + + return(count); +} + +/************************************************************************* +Scans a table create SQL string and adds to the data dictionary the foreign +key constraints declared in the string. This function should be called after +the indexes for a table have been created. Each foreign key constraint must +be accompanied with indexes in both participating tables. The indexes are +allowed to contain more fields than mentioned in the constraint. */ + +ulint +dict_create_foreign_constraints( +/*============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2; the default + database id the database of parameter name */ + char* name) /* in: table full name in the normalized form + database_name/table_name */ +{ + dict_table_t* table; + dict_table_t* referenced_table; + dict_index_t* index; + dict_foreign_t* foreign; + char* ptr = sql_string; + ibool success; + ulint error; + ulint i; + dict_col_t* columns[1000]; + char* column_names[1000]; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = dict_table_get_low(name); + + if (table == NULL) { + return(DB_ERROR); + } +loop: + ptr = dict_scan_to(ptr, "FOREIGN"); + + if (*ptr == '\0' || dict_bracket_count(sql_string, ptr) != 1) { + + /* The following call adds the foreign key constraints + to the data dictionary system tables on disk */ + + error = dict_create_add_foreigns_to_dictionary(table, trx); + + return(error); + } + + ptr = dict_accept(ptr, "FOREIGN", &success); + + if (!isspace(*ptr)) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(ptr, "KEY", &success); + + if (!success) { + goto loop; + } + + ptr = dict_accept(ptr, "(", &success); + + if (!success) { + goto loop; + } + + i = 0; + + /* Scan the columns in the first list */ +col_loop1: + ptr = dict_scan_col(ptr, &success, table, columns + i, + column_names + i); + if (!success) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + i++; + + ptr = dict_accept(ptr, ",", &success); + + if (success) { + goto col_loop1; + } + + ptr = dict_accept(ptr, ")", &success); + + if (!success) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Try to find an index which contains the columns + as the first fields and in the right order */ + + index = dict_foreign_find_index(table, column_names, i, NULL); + + if (!index) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(ptr, "REFERENCES", &success); + + if (!success || !isspace(*ptr)) { + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Let us create a constraint struct */ + + foreign = dict_mem_foreign_create(); + + foreign->foreign_table = table; + foreign->foreign_table_name = table->name; + foreign->foreign_index = index; + foreign->n_fields = i; + foreign->foreign_col_names = mem_heap_alloc(foreign->heap, + i * sizeof(void*)); + for (i = 0; i < foreign->n_fields; i++) { + foreign->foreign_col_names[i] = mem_heap_alloc(foreign->heap, + 1 + ut_strlen(columns[i]->name)); + ut_memcpy(foreign->foreign_col_names[i], columns[i]->name, + 1 + ut_strlen(columns[i]->name)); + } + + ptr = dict_scan_table_name(ptr, &referenced_table, name); + + if (!referenced_table) { + dict_foreign_free(foreign); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(ptr, "(", &success); + + if (!success) { + dict_foreign_free(foreign); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Scan the columns in the second list */ + i = 0; + +col_loop2: + ptr = dict_scan_col(ptr, &success, referenced_table, columns + i, + column_names + i); + i++; + + if (!success) { + dict_foreign_free(foreign); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + ptr = dict_accept(ptr, ",", &success); + + if (success) { + goto col_loop2; + } + + ptr = dict_accept(ptr, ")", &success); + + if (!success || foreign->n_fields != i) { + dict_foreign_free(foreign); + + return(DB_CANNOT_ADD_CONSTRAINT); + } + + /* Try to find an index which contains the columns as the first fields + and in the right order, and the types are the same as in + foreign->foreign_index */ + + index = dict_foreign_find_index(referenced_table, column_names, i, + foreign->foreign_index); + + if (!index) { + dict_foreign_free(foreign); + return(DB_CANNOT_ADD_CONSTRAINT); + } + + foreign->referenced_index = index; + foreign->referenced_table = referenced_table; + + foreign->referenced_table_name = mem_heap_alloc(foreign->heap, + 1 + ut_strlen(referenced_table->name)); + + ut_memcpy(foreign->referenced_table_name, referenced_table->name, + 1 + ut_strlen(referenced_table->name)); + + foreign->referenced_col_names = mem_heap_alloc(foreign->heap, + i * sizeof(void*)); + for (i = 0; i < foreign->n_fields; i++) { + foreign->referenced_col_names[i] + = mem_heap_alloc(foreign->heap, + 1 + ut_strlen(columns[i]->name)); + ut_memcpy( + foreign->referenced_col_names[i], columns[i]->name, + 1 + ut_strlen(columns[i]->name)); + } + + /* We found an ok constraint definition: add to the lists */ + + UT_LIST_ADD_LAST(foreign_list, table->foreign_list, foreign); + UT_LIST_ADD_LAST(referenced_list, referenced_table->referenced_list, + foreign); + goto loop; +} + +/*==================== END OF FOREIGN KEY PROCESSING ====================*/ + /************************************************************************** Adds a stored procedure object to the dictionary cache. */ @@ -1733,77 +2521,127 @@ dict_tree_build_data_tuple( } /************************************************************************* -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ +Calculates the minimum record length in an index. */ -void -dict_update_statistics( -/*===================*/ - dict_table_t* table) /* in: table */ +ulint +dict_index_calc_min_rec_len( +/*========================*/ + dict_index_t* index) /* in: index */ { - mem_heap_t* heap; - dict_index_t* index; - dtuple_t* start; - dtuple_t* end; - ulint n_rows; - ulint n_vals; - ulint size; - ulint sum_of_index_sizes = 0; - - /* Estimate the number of records in the clustered index */ - index = dict_table_get_first_index(table); - - heap = mem_heap_create(500); - - start = dtuple_create(heap, 0); - end = dtuple_create(heap, 0); + ulint sum = 0; + ulint i; - n_rows = btr_estimate_n_rows_in_range(index, start, PAGE_CUR_G, - end, PAGE_CUR_L); - mem_heap_free(heap); + for (i = 0; i < dict_index_get_n_fields(index); i++) { + sum += dtype_get_fixed_size(dict_index_get_nth_type(index, i)); + } - if (n_rows > 0) { - /* For small tables our estimate function tends to give - values 1 too big */ - n_rows--; + if (sum > 127) { + sum += 2 * dict_index_get_n_fields(index); + } else { + sum += dict_index_get_n_fields(index); } - mutex_enter(&(dict_sys->mutex)); + sum += REC_N_EXTRA_BYTES; - table->stat_last_estimate_counter = table->stat_modif_counter; - table->stat_n_rows = n_rows; + return(sum); +} - mutex_exit(&(dict_sys->mutex)); +/************************************************************************* +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. */ + +void +dict_update_statistics_low( +/*=======================*/ + dict_table_t* table, /* in: table */ + ibool has_dict_mutex) /* in: TRUE if the caller has the + dictionary mutex */ +{ + dict_index_t* index; + ulint size; + ulint sum_of_index_sizes = 0; /* Find out the sizes of the indexes and how many different values for the key they approximately have */ - + + index = dict_table_get_first_index(table); + while (index) { - n_vals = btr_estimate_number_of_different_key_vals(index); size = btr_get_size(index, BTR_TOTAL_SIZE); + index->stat_index_size = size; + sum_of_index_sizes += size; - mutex_enter(&(dict_sys->mutex)); + size = btr_get_size(index, BTR_N_LEAF_PAGES); - index->stat_n_diff_key_vals = n_vals; - index->stat_index_size = size; + if (size == 0) { + /* The root node of the tree is a leaf */ + size = 1; + } - mutex_exit(&(dict_sys->mutex)); + index->stat_n_leaf_pages = size; + + btr_estimate_number_of_different_key_vals(index); index = dict_table_get_next_index(index); } index = dict_table_get_first_index(table); + table->stat_n_rows = index->stat_n_diff_key_vals[ + dict_index_get_n_unique(index)]; + table->stat_clustered_index_size = index->stat_index_size; table->stat_sum_of_other_index_sizes = sum_of_index_sizes - - index->stat_index_size; + - index->stat_index_size; table->stat_last_estimate_counter = table->stat_modif_counter; } +/************************************************************************* +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. */ + +void +dict_update_statistics( +/*===================*/ + dict_table_t* table) /* in: table */ +{ + dict_update_statistics_low(table, FALSE); +} + +/************************************************************************** +Prints info of a foreign key constraint. */ +static +void +dict_foreign_print_low( +/*===================*/ + dict_foreign_t* foreign) /* in: foreign key constraint */ +{ + ulint i; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + printf(" FOREIGN KEY CONSTRAINT %s: %s (", foreign->id, + foreign->foreign_table_name); + + for (i = 0; i < foreign->n_fields; i++) { + printf(" %s", foreign->foreign_col_names[i]); + } + + printf(" )\n"); + + printf(" REFERENCES %s (", foreign->referenced_table_name); + + for (i = 0; i < foreign->n_fields; i++) { + printf(" %s", foreign->referenced_col_names[i]); + } + + printf(" )\n"); +} + /************************************************************************** Prints a table data. */ @@ -1839,31 +2677,57 @@ dict_table_print_by_name( /************************************************************************** Prints a table data. */ -static + void dict_table_print_low( /*=================*/ dict_table_t* table) /* in: table */ { - ulint i; dict_index_t* index; + dict_foreign_t* foreign; + ulint i; ut_ad(mutex_own(&(dict_sys->mutex))); + dict_update_statistics_low(table, TRUE); + printf("--------------------------------------\n"); - printf("TABLE INFO: name %s, columns %lu, indexes %lu\n", table->name, - table->n_cols, UT_LIST_GET_LEN(table->indexes)); - for (i = 0; i < table->n_cols; i++) { - printf(" "); + printf( + "TABLE: name %s, id %lu %lu, columns %lu, indexes %lu, appr.rows %lu\n", + table->name, + ut_dulint_get_high(table->id), + ut_dulint_get_low(table->id), + table->n_cols, UT_LIST_GET_LEN(table->indexes), + (ulint)table->stat_n_rows); + printf(" COLUMNS: "); + + for (i = 0; i < table->n_cols - 1; i++) { dict_col_print_low(dict_table_get_nth_col(table, i)); + printf("; "); } + printf("\n"); + index = UT_LIST_GET_FIRST(table->indexes); while (index != NULL) { dict_index_print_low(index); index = UT_LIST_GET_NEXT(indexes, index); } + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign != NULL) { + dict_foreign_print_low(foreign); + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign != NULL) { + dict_foreign_print_low(foreign); + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } } /************************************************************************** @@ -1879,7 +2743,7 @@ dict_col_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); type = dict_col_get_type(col); - printf("COLUMN: name %s; ", col->name); + printf("%s: ", col->name); dtype_print(type); } @@ -1892,28 +2756,47 @@ dict_index_print_low( /*=================*/ dict_index_t* index) /* in: index */ { - ulint i; dict_tree_t* tree; + ib_longlong n_vals; + ulint i; ut_ad(mutex_own(&(dict_sys->mutex))); tree = index->tree; - + + if (index->n_user_defined_cols > 0) { + n_vals = index->stat_n_diff_key_vals[ + index->n_user_defined_cols]; + } else { + n_vals = index->stat_n_diff_key_vals[1]; + } + + printf( - "INDEX INFO: name %s, table name %s, fields %lu, type %lu\n", - index->name, index->table_name, index->n_fields, - index->type); - printf(" root node: space %lu, page number %lu\n", - tree->space, tree->page); + " INDEX: name %s, table name %s, id %lu %lu, fields %lu/%lu, type %lu\n", + index->name, index->table_name, + ut_dulint_get_high(tree->id), + ut_dulint_get_low(tree->id), + index->n_user_defined_cols, + index->n_fields, index->type); + printf( + " root page %lu, appr.key vals %lu, leaf pages %lu, size pages %lu\n", + tree->page, + (ulint)n_vals, + index->stat_n_leaf_pages, + index->stat_index_size); + printf(" FIELDS: "); + for (i = 0; i < index->n_fields; i++) { - printf(" "); dict_field_print_low(dict_index_get_nth_field(index, i)); } - btr_print_size(tree); + printf("\n"); + +/* btr_print_size(tree); */ - btr_print_tree(tree, 7); +/* btr_print_tree(tree, 7); */ } /************************************************************************** @@ -1926,6 +2809,5 @@ dict_field_print_low( { ut_ad(mutex_own(&(dict_sys->mutex))); - printf("FIELD: column name %s, order criterion %lu\n", field->name, - field->order); + printf(" %s", field->name); } diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c index be16988086a..dcdc9ee01cd 100644 --- a/innobase/dict/dict0load.c +++ b/innobase/dict/dict0load.c @@ -48,8 +48,171 @@ dict_load_fields( /************************************************************************ +Finds the first table name in the given database. */ + +char* +dict_get_first_table_name_in_db( +/*============================*/ + /* out, own: table name, NULL if does not exist; + the caller must free the memory in the string! */ + char* name) /* in: database name which ends to '/' */ +{ + dict_table_t* sys_tables; + btr_pcur_t pcur; + dict_index_t* sys_index; + dtuple_t* tuple; + mem_heap_t* heap; + dfield_t* dfield; + rec_t* rec; + byte* field; + ulint len; + char* table_name; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + heap = mem_heap_create(1000); + + mtr_start(&mtr); + + sys_tables = dict_table_get_low("SYS_TABLES"); + sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, name, ut_strlen(name)); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); +loop: + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + /* Not found */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + field = rec_get_nth_field(rec, 0, &len); + + if (len < strlen(name) + || ut_memcmp(name, field, strlen(name)) != 0) { + /* Not found */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + if (!rec_get_deleted_flag(rec)) { + + /* We found one */ + + table_name = mem_alloc(len + 1); + ut_memcpy(table_name, field, len); + table_name[len] = '\0'; + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(table_name); + } + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + goto loop; +} + +/************************************************************************ +Prints to the standard output information on all tables found in the data +dictionary system table. */ + +void +dict_print(void) +/*============*/ +{ + dict_table_t* sys_tables; + dict_index_t* sys_index; + dict_table_t* table; + btr_pcur_t pcur; + rec_t* rec; + byte* field; + ulint len; + char table_name[10000]; + mtr_t mtr; + + mutex_enter(&(dict_sys->mutex)); + + mtr_start(&mtr); + + sys_tables = dict_table_get_low("SYS_TABLES"); + sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + + btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, + TRUE, &mtr); +loop: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + /* end of index */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + mutex_exit(&(dict_sys->mutex)); + + return; + } + + field = rec_get_nth_field(rec, 0, &len); + + if (!rec_get_deleted_flag(rec)) { + + /* We found one */ + + ut_memcpy(table_name, field, len); + table_name[len] = '\0'; + + btr_pcur_store_position(&pcur, &mtr); + + mtr_commit(&mtr); + + table = dict_table_get_low(table_name); + + if (table == NULL) { + fprintf(stderr, "InnoDB: Failed to load table %s\n", + table_name); + } else { + dict_update_statistics_low(table, TRUE); + + dict_table_print_low(table); + } + + mtr_start(&mtr); + + btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); + } + + goto loop; +} + +/************************************************************************ Loads a table definition and also all its index definitions, and also -the cluster definition if the table is a member in a cluster. */ +the cluster definition if the table is a member in a cluster. Also loads +all foreign key constraints where the foreign key is in the table or where +a foreign key references columns in this table. Adds all these to the data +dictionary cache. */ dict_table_t* dict_load_table( @@ -59,7 +222,6 @@ dict_load_table( { dict_table_t* table; dict_table_t* sys_tables; - mtr_t mtr; btr_pcur_t pcur; dict_index_t* sys_index; dtuple_t* tuple; @@ -71,6 +233,7 @@ dict_load_table( char* buf; ulint space; ulint n_cols; + mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); @@ -178,6 +341,106 @@ dict_load_table( dict_load_indexes(table, heap); + ut_a(DB_SUCCESS == dict_load_foreigns(table->name)); + + mem_heap_free(heap); + + return(table); +} + +/*************************************************************************** +Loads a table object based on the table id. */ + +dict_table_t* +dict_load_table_on_id( +/*==================*/ + /* out: table; NULL if table does not exist */ + dulint table_id) /* in: table id */ +{ + byte id_buf[8]; + btr_pcur_t pcur; + mem_heap_t* heap; + dtuple_t* tuple; + dfield_t* dfield; + dict_index_t* sys_table_ids; + dict_table_t* sys_tables; + rec_t* rec; + byte* field; + ulint len; + dict_table_t* table; + char* name; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* NOTE that the operation of this function is protected by + the dictionary mutex, and therefore no deadlocks can occur + with other dictionary operations. */ + + mtr_start(&mtr); + /*---------------------------------------------------*/ + /* Get the secondary index based on ID for table SYS_TABLES */ + sys_tables = dict_sys->sys_tables; + sys_table_ids = dict_table_get_next_index( + dict_table_get_first_index(sys_tables)); + heap = mem_heap_create(256); + + tuple = dtuple_create(heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + /* Write the table id in byte format to id_buf */ + mach_write_to_8(id_buf, table_id); + + dfield_set_data(dfield, id_buf, 8); + dict_index_copy_types(tuple, sys_table_ids, 1); + + btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr) + || rec_get_deleted_flag(rec)) { + /* Not found */ + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + /*---------------------------------------------------*/ + /* Now we have the record in the secondary index containing the + table ID and NAME */ + + rec = btr_pcur_get_rec(&pcur); + field = rec_get_nth_field(rec, 0, &len); + ut_ad(len == 8); + + /* Check if the table id in record is the one searched for */ + if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap); + + return(NULL); + } + + /* Now we get the table name from the record */ + field = rec_get_nth_field(rec, 1, &len); + + name = mem_heap_alloc(heap, len + 1); + ut_memcpy(name, field, len); + name[len] = '\0'; + + /* Load the table definition to memory */ + table = dict_load_table(name); + + ut_a(table); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); mem_heap_free(heap); return(table); @@ -305,7 +568,8 @@ dict_load_columns( } /************************************************************************ -Loads definitions for table indexes. */ +Loads definitions for table indexes. Adds them to the data dictionary cache. +*/ static void dict_load_indexes( @@ -446,7 +710,6 @@ dict_load_fields( { dict_table_t* sys_fields; dict_index_t* sys_index; - mtr_t mtr; btr_pcur_t pcur; dtuple_t* tuple; dfield_t* dfield; @@ -456,6 +719,7 @@ dict_load_fields( ulint len; byte* buf; ulint i; + mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); @@ -512,100 +776,328 @@ dict_load_fields( mtr_commit(&mtr); } +/************************************************************************ +Loads foreign key constraint col names (also for the referenced table). */ +static +void +dict_load_foreign_cols( +/*===================*/ + char* id, /* in: foreign constraint id as a null- + terminated string */ + dict_foreign_t* foreign)/* in: foreign constraint object */ +{ + dict_table_t* sys_foreign_cols; + dict_index_t* sys_index; + btr_pcur_t pcur; + dtuple_t* tuple; + dfield_t* dfield; + char* col_name; + rec_t* rec; + byte* field; + ulint len; + ulint i; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + foreign->foreign_col_names = mem_heap_alloc(foreign->heap, + foreign->n_fields * sizeof(void*)); + + foreign->referenced_col_names = mem_heap_alloc(foreign->heap, + foreign->n_fields * sizeof(void*)); + mtr_start(&mtr); + + sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS"); + sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes); + + tuple = dtuple_create(foreign->heap, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, id, ut_strlen(id)); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + for (i = 0; i < foreign->n_fields; i++) { + + rec = btr_pcur_get_rec(&pcur); + + ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); + ut_a(!rec_get_deleted_flag(rec)); + + field = rec_get_nth_field(rec, 0, &len); + ut_a(len == ut_strlen(id)); + ut_a(ut_memcmp(id, field, len) == 0); + + field = rec_get_nth_field(rec, 1, &len); + ut_a(len == 4); + ut_a(i == mach_read_from_4(field)); + + field = rec_get_nth_field(rec, 4, &len); + + col_name = mem_heap_alloc(foreign->heap, len + 1); + ut_memcpy(col_name, field, len); + col_name[len] = '\0'; + + foreign->foreign_col_names[i] = col_name; + + field = rec_get_nth_field(rec, 5, &len); + + col_name = mem_heap_alloc(foreign->heap, len + 1); + ut_memcpy(col_name, field, len); + col_name[len] = '\0'; + + foreign->referenced_col_names[i] = col_name; + + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + } + + btr_pcur_close(&pcur); + mtr_commit(&mtr); +} + /*************************************************************************** -Loads a table object based on the table id. */ +Loads a foreign key constraint to the dictionary cache. */ +static +ulint +dict_load_foreign( +/*==============*/ + /* out: DB_SUCCESS or error code */ + char* id) /* in: foreign constraint id as a null-terminated + string */ +{ + dict_foreign_t* foreign; + dict_table_t* sys_foreign; + btr_pcur_t pcur; + dict_index_t* sys_index; + dtuple_t* tuple; + mem_heap_t* heap2; + dfield_t* dfield; + rec_t* rec; + byte* field; + ulint len; + ulint err; + mtr_t mtr; + + ut_ad(mutex_own(&(dict_sys->mutex))); -dict_table_t* -dict_load_table_on_id( -/*==================*/ - /* out: table; NULL if table does not exist */ - dulint table_id) /* in: table id */ + heap2 = mem_heap_create(1000); + + mtr_start(&mtr); + + sys_foreign = dict_table_get_low("SYS_FOREIGN"); + sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes); + + tuple = dtuple_create(heap2, 1); + dfield = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(dfield, id, ut_strlen(id)); + dict_index_copy_types(tuple, sys_index, 1); + + btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + rec = btr_pcur_get_rec(&pcur); + + if (!btr_pcur_is_on_user_rec(&pcur, &mtr) + || rec_get_deleted_flag(rec)) { + /* Not found */ + + fprintf(stderr, + "InnoDB: Error A: cannot load foreign constraint %s\n", id); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap2); + + return(DB_ERROR); + } + + field = rec_get_nth_field(rec, 0, &len); + + /* Check if the id in record is the searched one */ + if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) { + + fprintf(stderr, + "InnoDB: Error B: cannot load foreign constraint %s\n", id); + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + mem_heap_free(heap2); + + return(DB_ERROR); + } + + /* Read the table names and the number of columns associated + with the constraint */ + + mem_heap_free(heap2); + + foreign = dict_mem_foreign_create(); + + foreign->n_fields = mach_read_from_4(rec_get_nth_field(rec, 5, &len)); + + ut_a(len == 4); + + foreign->id = mem_heap_alloc(foreign->heap, ut_strlen(id) + 1); + + ut_memcpy(foreign->id, id, ut_strlen(id) + 1); + + field = rec_get_nth_field(rec, 3, &len); + + foreign->foreign_table_name = mem_heap_alloc(foreign->heap, 1 + len); + + ut_memcpy(foreign->foreign_table_name, field, len); + foreign->foreign_table_name[len] = '\0'; + + field = rec_get_nth_field(rec, 4, &len); + + foreign->referenced_table_name = mem_heap_alloc(foreign->heap, 1 + len); + + ut_memcpy(foreign->referenced_table_name, field, len); + foreign->referenced_table_name[len] = '\0'; + + btr_pcur_close(&pcur); + mtr_commit(&mtr); + + dict_load_foreign_cols(id, foreign); + + /* Note that there may already be a foreign constraint object in + the dictionary cache for this constraint: then the following + call only sets the pointers in it to point to the appropriate table + and index objects and frees the newly created object foreign. */ + + err = dict_foreign_add_to_cache(foreign); + + return(err); +} + +/*************************************************************************** +Loads foreign key constraints where the table is either the foreign key +holder or where the table is referenced by a foreign key. Adds these +constraints to the data dictionary. Note that we know that the dictionary +cache already contains all constraints where the other relevant table is +already in the dictionary cache. */ + +ulint +dict_load_foreigns( +/*===============*/ + /* out: DB_SUCCESS or error code */ + char* table_name) /* in: table name */ { - mtr_t mtr; - byte id_buf[8]; btr_pcur_t pcur; mem_heap_t* heap; dtuple_t* tuple; dfield_t* dfield; - dict_index_t* sys_table_ids; - dict_table_t* sys_tables; + dict_index_t* sec_index; + dict_table_t* sys_foreign; rec_t* rec; byte* field; ulint len; - dict_table_t* table; - char* name; + char* id ; + ulint err; + mtr_t mtr; ut_ad(mutex_own(&(dict_sys->mutex))); - /* NOTE that the operation of this function is protected by - the dictionary mutex, and therefore no deadlocks can occur - with other dictionary operations. */ + sys_foreign = dict_table_get_low("SYS_FOREIGN"); + + if (sys_foreign == NULL) { + /* No foreign keys defined yet in this database */ + + fprintf(stderr, + "InnoDB: Error: no foreign key system tables in the database\n"); + + return(DB_ERROR); + } mtr_start(&mtr); - /*---------------------------------------------------*/ - /* Get the secondary index based on ID for table SYS_TABLES */ - sys_tables = dict_sys->sys_tables; - sys_table_ids = dict_table_get_next_index( - dict_table_get_first_index(sys_tables)); + + /* Get the secondary index based on FOR_NAME from table + SYS_FOREIGN */ + + sec_index = dict_table_get_next_index( + dict_table_get_first_index(sys_foreign)); +start_load: heap = mem_heap_create(256); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); - /* Write the table id in byte format to id_buf */ - mach_write_to_8(id_buf, table_id); - - dfield_set_data(dfield, id_buf, 8); - dict_index_copy_types(tuple, sys_table_ids, 1); + dfield_set_data(dfield, table_name, ut_strlen(table_name)); + dict_index_copy_types(tuple, sec_index, 1); - btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE, + btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); +loop: rec = btr_pcur_get_rec(&pcur); - if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec)) { - /* Not found */ + if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { + /* End of index */ - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + goto load_next_index; } - /*---------------------------------------------------*/ - /* Now we have the record in the secondary index containing the - table ID and NAME */ + /* Now we have the record in the secondary index containing a table + name and a foreign constraint ID */ rec = btr_pcur_get_rec(&pcur); field = rec_get_nth_field(rec, 0, &len); - ut_ad(len == 8); - /* Check if the table id in record is the one searched for */ - if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { + /* Check if the table name in record is the one searched for */ + if (len != ut_strlen(table_name) + || 0 != ut_memcmp(field, table_name, len)) { - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); + goto load_next_index; } - /* Now we get the table name from the record */ + if (rec_get_deleted_flag(rec)) { + + goto next_rec; + } + + /* Now we get a foreign key constraint id */ field = rec_get_nth_field(rec, 1, &len); - name = mem_heap_alloc(heap, len + 1); - ut_memcpy(name, field, len); - name[len] = '\0'; + id = mem_heap_alloc(heap, len + 1); + ut_memcpy(id, field, len); + id[len] = '\0'; - /* Load the table definition to memory */ - table = dict_load_table(name); + btr_pcur_store_position(&pcur, &mtr); - ut_a(table); + mtr_commit(&mtr); + + /* Load the foreign constraint definition to the dictionary cache */ + err = dict_load_foreign(id); + + if (err != DB_SUCCESS) { + btr_pcur_close(&pcur); + mem_heap_free(heap); + + return(err); + } + + mtr_start(&mtr); + + btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); +next_rec: + btr_pcur_move_to_next_user_rec(&pcur, &mtr); + + goto loop; + +load_next_index: btr_pcur_close(&pcur); mtr_commit(&mtr); mem_heap_free(heap); + + sec_index = dict_table_get_next_index(sec_index); - return(table); + if (sec_index != NULL) { + + mtr_start(&mtr); + + goto start_load; + } + + return(DB_SUCCESS); } diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c index 6947db11aea..57926ab9d2f 100644 --- a/innobase/dict/dict0mem.c +++ b/innobase/dict/dict0mem.c @@ -18,6 +18,7 @@ Created 1/8/1996 Heikki Tuuri #include "dict0dict.h" #include "que0que.h" #include "pars0pars.h" +#include "lock0lock.h" #define DICT_HEAP_SIZE 100 /* initial memory heap size when creating a table or index object */ @@ -63,7 +64,12 @@ dict_mem_table_create( table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS) * sizeof(dict_col_t)); UT_LIST_INIT(table->indexes); + + table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size()); + UT_LIST_INIT(table->locks); + UT_LIST_INIT(table->foreign_list); + UT_LIST_INIT(table->referenced_list); table->does_not_fit_in_memory = FALSE; @@ -199,6 +205,8 @@ dict_mem_index_create( * sizeof(dict_field_t)); /* The '1 +' above prevents allocation of an empty mem block */ + index->stat_n_diff_key_vals = NULL; + index->cached = FALSE; index->magic_n = DICT_INDEX_MAGIC_N; @@ -206,6 +214,41 @@ dict_mem_index_create( } /************************************************************************** +Creates and initializes a foreign constraint memory object. */ + +dict_foreign_t* +dict_mem_foreign_create(void) +/*=========================*/ + /* out, own: foreign constraint struct */ +{ + dict_foreign_t* foreign; + mem_heap_t* heap; + + heap = mem_heap_create(100); + + foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t)); + + foreign->heap = heap; + + foreign->id = NULL; + + foreign->foreign_table_name = NULL; + foreign->foreign_table = NULL; + foreign->foreign_col_names = NULL; + + foreign->referenced_table_name = NULL; + foreign->referenced_table = NULL; + foreign->referenced_col_names = NULL; + + foreign->n_fields = 0; + + foreign->foreign_index = NULL; + foreign->referenced_index = NULL; + + return(foreign); +} + +/************************************************************************** Adds a field definition to an index. NOTE: does not take a copy of the column name if the field is a column. The memory occupied by the column name may be released only after publishing the index. */ diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index b386f224d11..62389c8394c 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -77,6 +77,9 @@ out of the LRU-list and keep a count of pending operations. When an operation completes, we decrement the count and return the file node to the LRU-list if the count drops to zero. */ +ulint fil_n_pending_log_flushes = 0; +ulint fil_n_pending_tablespace_flushes = 0; + /* Null file address */ fil_addr_t fil_addr_null = {FIL_NULL, 0}; @@ -856,6 +859,15 @@ fil_node_prepare_for_io( last_node = UT_LIST_GET_LAST(system->LRU); + if (last_node == NULL) { + fprintf(stderr, + "InnoDB: Error: cannot close any file to open another for i/o\n" + "InnoDB: Pending i/o's on %lu files exist\n", + system->n_open_pending); + + ut_a(0); + } + fil_node_close(last_node, system); } @@ -973,7 +985,8 @@ fil_io( ibool ret; ulint is_log; ulint wake_later; - + ulint count; + is_log = type & OS_FILE_LOG; type = type & ~OS_FILE_LOG; @@ -996,7 +1009,7 @@ fil_io( #endif if (sync) { mode = OS_AIO_SYNC; - } else if ((type == OS_FILE_READ) && !is_log + } else if (type == OS_FILE_READ && !is_log && ibuf_page(space_id, block_offset)) { mode = OS_AIO_IBUF; } else if (is_log) { @@ -1006,9 +1019,44 @@ fil_io( } system = fil_system; + + count = 0; loop: + count++; + + /* NOTE that there is a possibility of a hang here: + if the read i/o-handler thread needs to complete + a read by reading from the insert buffer, it may need to + post another read. But if the maximum number of files + are already open, it cannot proceed from here! */ + mutex_enter(&(system->mutex)); + if (count < 500 && !is_log && !ibuf_inside() + && system->n_open_pending >= (3 * system->max_n_open) / 4) { + + /* We are not doing an ibuf operation: leave a + safety margin of openable files for possible ibuf + merges needed in page read completion */ + + mutex_exit(&(system->mutex)); + + /* Wake the i/o-handler threads to make sure pending + i/o's are handled and eventually we can open the file */ + + os_aio_simulated_wake_handler_threads(); + + os_thread_sleep(100000); + + if (count > 50) { + fprintf(stderr, + "InnoDB: Warning: waiting for file closes to proceed\n" + "InnoDB: round %lu\n", count); + } + + goto loop; + } + if (system->n_open_pending == system->max_n_open) { /* It is not sure we can open the file if it is closed: wait */ @@ -1018,11 +1066,19 @@ loop: mutex_exit(&(system->mutex)); + /* Wake the i/o-handler threads to make sure pending + i/o's are handled and eventually we can open the file */ + + os_aio_simulated_wake_handler_threads(); + + fprintf(stderr, + "InnoDB: Warning: max allowed number of files is open\n"); + os_event_wait(event); goto loop; } - + HASH_SEARCH(hash, system->spaces, space_id, space, space->id == space_id); ut_a(space); @@ -1160,6 +1216,7 @@ fil_aio_wait( #elif defined(POSIX_ASYNC_IO) ret = os_aio_posix_handle(segment, &fil_node, &message); #else + ret = 0; /* Eliminate compiler warning */ ut_a(0); #endif } else { @@ -1220,6 +1277,12 @@ fil_flush( node->is_modified = FALSE; + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes++; + } else { + fil_n_pending_log_flushes++; + } + mutex_exit(&(system->mutex)); /* Note that it is not certain, when we have @@ -1233,6 +1296,12 @@ fil_flush( os_file_flush(file); mutex_enter(&(system->mutex)); + + if (space->purpose == FIL_TABLESPACE) { + fil_n_pending_tablespace_flushes--; + } else { + fil_n_pending_log_flushes--; + } } node = UT_LIST_GET_NEXT(chain, node); @@ -1377,7 +1446,7 @@ fil_page_set_type( ulint type) /* in: type */ { ut_ad(page); - ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_INDEX)); + ut_ad((type == FIL_PAGE_INDEX) || (type == FIL_PAGE_UNDO_LOG)); mach_write_to_2(page + FIL_PAGE_TYPE, type); } diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index fa1c630dc08..d289b176efa 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -1013,7 +1013,7 @@ ibuf_rec_get_volume( ulint i; ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields(rec) > 2); + ut_ad(rec_get_n_fields(ibuf_rec) > 2); n_fields = rec_get_n_fields(ibuf_rec) - 2; @@ -1624,13 +1624,14 @@ ibuf_get_merge_page_nos( /************************************************************************* Contracts insert buffer trees by reading pages to the buffer pool. */ - +static ulint -ibuf_contract( -/*==========*/ +ibuf_contract_ext( +/*==============*/ /* out: a lower limit for the combined size in bytes of entries which will be merged from ibuf trees to the pages read, 0 if ibuf is empty */ + ulint* n_pages,/* out: number of pages to which merged */ ibool sync) /* in: TRUE if the caller wants to wait for the issued read with the highest tablespace address to complete */ @@ -1644,6 +1645,8 @@ ibuf_contract( ulint n_stored; ulint sum_sizes; mtr_t mtr; + + *n_pages = 0; loop: ut_ad(!ibuf_inside()); @@ -1730,10 +1733,65 @@ loop: buf_read_ibuf_merge_pages(sync, space, page_nos, n_stored); + *n_pages = n_stored; + return(sum_sizes + 1); } /************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ + +ulint +ibuf_contract( +/*==========*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync) /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ +{ + ulint n_pages; + + return(ibuf_contract_ext(&n_pages, sync)); +} + +/************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ + +ulint +ibuf_contract_for_n_pages( +/*======================*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync, /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ + ulint n_pages)/* in: try to read at least this many pages to + the buffer pool and merge the ibuf contents to + them */ +{ + ulint sum_bytes = 0; + ulint sum_pages = 0; + ulint n_bytes; + ulint n_pag2; + + while (sum_pages < n_pages) { + n_bytes = ibuf_contract_ext(&n_pag2, sync); + + if (n_bytes == 0) { + return(sum_bytes); + } + + sum_bytes += n_bytes; + sum_pages += n_pag2; + } + + return(sum_bytes); +} + +/************************************************************************* Contract insert buffer trees after insert if they are too big. */ UNIV_INLINE void @@ -2252,8 +2310,6 @@ ibuf_insert_to_index_page( if (low_match == dtuple_get_n_fields(entry)) { rec = page_cur_get_rec(&page_cur); - - ut_ad(rec_get_deleted_flag(rec)); btr_cur_del_unmark_for_ibuf(rec, mtr); } else { @@ -2306,6 +2362,8 @@ ibuf_delete_rec( should belong */ btr_pcur_t* pcur, /* in: pcur positioned on the record to delete, having latch mode BTR_MODIFY_LEAF */ + dtuple_t* search_tuple, + /* in: search tuple for entries of page_no */ mtr_t* mtr) /* in: mtr */ { ibool success; @@ -2336,12 +2394,33 @@ ibuf_delete_rec( mtr_start(mtr); - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); + success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr); + + if (!success) { + fprintf(stderr, + "InnoDB: ERROR: Send the output to heikki.tuuri@innodb.com\n"); + fprintf(stderr, "InnoDB: ibuf cursor restoration fails!\n"); + fprintf(stderr, "InnoDB: ibuf record inserted to page %lu\n", + page_no); + rec_print(btr_pcur_get_rec(pcur)); + rec_print(pcur->old_rec); + dtuple_print(search_tuple); + + rec_print(page_rec_get_next(btr_pcur_get_rec(pcur))); + + mtr_commit(mtr); + + fprintf(stderr, "InnoDB: Validating insert buffer tree:\n"); + ut_a(btr_validate_tree(ibuf_data->index->tree)); + fprintf(stderr, "InnoDB: Ibuf tree ok\n"); + } + + ut_a(success); root = ibuf_tree_root_get(ibuf_data, space, mtr); btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), - FALSE, mtr); + FALSE, mtr); ut_a(err == DB_SUCCESS); #ifdef UNIV_IBUF_DEBUG @@ -2393,8 +2472,11 @@ ibuf_merge_or_delete_for_page( dulint max_trx_id; mtr_t mtr; - /* TODO: get MySQL type info to use in ibuf_insert_to_index_page */ + if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { + return; + } + #ifdef UNIV_LOG_DEBUG if (space % 2 != 0) { @@ -2451,16 +2533,13 @@ loop: if (page) { success = buf_page_get_known_nowait(RW_X_LATCH, page, BUF_KEEP_OLD, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif &mtr); - ut_a(success); buf_page_dbg_add_level(page, SYNC_TREE_NODE); } - + /* Position pcur in the insert buffer at the first entry for this index page */ btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE, @@ -2476,7 +2555,7 @@ loop: ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr)); ibuf_rec = btr_pcur_get_rec(&pcur); - + /* Check if the entry is for this index page */ if (ibuf_rec_get_page_no(ibuf_rec) != page_no) { @@ -2508,13 +2587,13 @@ loop: / IBUF_PAGE_SIZE_PER_FREE_SPACE); #endif ibuf_insert_to_index_page(entry, page, &mtr); - - n_inserts++; } + + n_inserts++; /* Delete the record from ibuf */ - closed = ibuf_delete_rec(space, page_no, &pcur, &mtr); - + closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple, + &mtr); if (closed) { /* Deletion was pessimistic and mtr was committed: we start from the beginning again */ @@ -2524,6 +2603,7 @@ loop: if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) { mtr_commit(&mtr); + btr_pcur_close(&pcur); goto loop; } @@ -2619,8 +2699,6 @@ ibuf_print(void) #endif mutex_enter(&ibuf_mutex); - printf("Ibuf size %lu max size %lu\n", ibuf->size, ibuf->max_size); - data = UT_LIST_GET_FIRST(ibuf->data_list); while (data) { diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index f56a5662253..bce1f0685cc 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -188,6 +188,22 @@ btr_cur_pessimistic_insert( que_thr_t* thr, /* in: query thread or NULL */ mtr_t* mtr); /* in: mtr */ /***************************************************************** +Updates a secondary index record when the update causes no size +changes in its fields. The only case when this function is currently +called is that in a char field characters change to others which +are identified in the collation order. */ + +ulint +btr_cur_update_sec_rec_in_place( +/*============================*/ + /* out: DB_SUCCESS or error number */ + btr_cur_t* cursor, /* in: cursor on the record to update; + cursor stays valid and positioned on the + same record */ + upd_t* update, /* in: update vector */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** Updates a record when the update causes no size changes in its fields. */ ulint @@ -411,12 +427,13 @@ btr_estimate_n_rows_in_range( dtuple_t* tuple2, /* in: range end, may also be empty tuple */ ulint mode2); /* in: search mode for range end */ /*********************************************************************** -Estimates the number of different key values in a given index. */ +Estimates the number of different key values in a given index, for +each n-column prefix of the index where n <= dict_index_get_n_unique(index). +The estimates are stored in the array index->stat_n_diff_key_vals. */ -ulint +void btr_estimate_number_of_different_key_vals( /*======================================*/ - /* out: estimated number of key values */ dict_index_t* index); /* in: index */ /*********************************************************************** Marks not updated extern fields as not-owned by this record. The ownership diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h index 6465093e3c1..05b55e4491d 100644 --- a/innobase/include/btr0pcur.h +++ b/innobase/include/btr0pcur.h @@ -19,9 +19,15 @@ Created 2/23/1996 Heikki Tuuri #include "btr0types.h" /* Relative positions for a stored cursor position */ -#define BTR_PCUR_ON 1 -#define BTR_PCUR_BEFORE 2 -#define BTR_PCUR_AFTER 3 +#define BTR_PCUR_ON 1 +#define BTR_PCUR_BEFORE 2 +#define BTR_PCUR_AFTER 3 +/* Note that if the tree is not empty, btr_pcur_store_position does not +use the following, but only uses the above three alternatives, where the +position is stored relative to a specific record: this makes implementation +of a scroll cursor easier */ +#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */ +#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */ /****************************************************************** Allocates memory for a persistent cursor object and initializes the cursor. */ @@ -170,34 +176,16 @@ btr_pcur_close( /****************************************************************** The position of the cursor is stored by taking an initial segment of the record the cursor is positioned on, before, or after, and copying it to the -cursor data structure. NOTE that the page where the cursor is positioned -must not be empty! */ +cursor data structure, or just setting a flag if the cursor id before the +first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the +page where the cursor is positioned must not be empty if the index tree is +not totally empty! */ void btr_pcur_store_position( /*====================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, -releases the page latch and bufferfix reserved by the cursor. -NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes -made by the current mini-transaction to the data protected by the -cursor latch, as then the latch must not be released until mtr_commit. */ - -void -btr_pcur_release_leaf( -/*==================*/ btr_pcur_t* cursor, /* in: persistent cursor */ mtr_t* mtr); /* in: mtr */ -/************************************************************* -Gets the rel_pos field for a cursor whose position has been stored. */ -UNIV_INLINE -ulint -btr_pcur_get_rel_pos( -/*=================*/ - /* out: BTR_PCUR_ON, ... */ - btr_pcur_t* cursor);/* in: persistent cursor */ /****************************************************************** Restores the stored position of a persistent cursor bufferfixing the page and obtaining the specified latches. If the cursor position was saved when the @@ -207,7 +195,9 @@ to the last record LESS OR EQUAL to the stored record; the last record LESS than the user record which was the successor of the page infimum; (3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. */ +GREATER than the user record which was the predecessor of the supremum. +(4) cursor was positioned before the first or after the last in an empty tree: +restores to before first or after the last in the tree. */ ibool btr_pcur_restore_position( @@ -220,6 +210,26 @@ btr_pcur_restore_position( ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ btr_pcur_t* cursor, /* in: detached persistent cursor */ mtr_t* mtr); /* in: mtr */ +/****************************************************************** +If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, +releases the page latch and bufferfix reserved by the cursor. +NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes +made by the current mini-transaction to the data protected by the +cursor latch, as then the latch must not be released until mtr_commit. */ + +void +btr_pcur_release_leaf( +/*==================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Gets the rel_pos field for a cursor whose position has been stored. */ +UNIV_INLINE +ulint +btr_pcur_get_rel_pos( +/*=================*/ + /* out: BTR_PCUR_ON, ... */ + btr_pcur_t* cursor);/* in: persistent cursor */ /************************************************************* Sets the mtr field for a pcur. */ UNIV_INLINE @@ -458,7 +468,7 @@ struct btr_pcur_struct{ ulint search_mode; /* PAGE_CUR_G, ... */ /*-----------------------------*/ /* NOTE that the following fields may possess dynamically allocated - memory, which should be freed if not needed anymore! */ + memory which should be freed if not needed anymore! */ mtr_t* mtr; /* NULL, or this field may contain a mini-transaction which holds the diff --git a/innobase/include/btr0pcur.ic b/innobase/include/btr0pcur.ic index 8e927689208..a60140e4aa9 100644 --- a/innobase/include/btr0pcur.ic +++ b/innobase/include/btr0pcur.ic @@ -19,8 +19,8 @@ btr_pcur_get_rel_pos( ut_ad(cursor); ut_ad(cursor->old_rec); ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED); - ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) - || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); + ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED + || cursor->pos_state == BTR_PCUR_IS_POSITIONED); return(cursor->rel_pos); } diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h index c319e16d740..fdf5cf375a3 100644 --- a/innobase/include/btr0sea.h +++ b/innobase/include/btr0sea.h @@ -262,6 +262,12 @@ index */ #define BTR_SEARCH_ON_HASH_LIMIT 3 +/* We do this many searches before trying to keep the search latch over calls +from MySQL. If we notice someone waiting for the latch, we again set this +much timeout. This is to reduce contention. */ + +#define BTR_SEA_TIMEOUT 10000 + #ifndef UNIV_NONINL #include "btr0sea.ic" #endif diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 66071030402..5ddbf39335a 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -116,53 +116,30 @@ buf_frame_copy( NOTE! The following macros should be used instead of buf_page_get_gen, to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed in LA! */ -#ifdef UNIV_SYNC_DEBUG #define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\ SP, OF, LA, NULL,\ BUF_GET, IB__FILE__, __LINE__, MTR) -#else -#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\ - SP, OF, LA, NULL,\ - BUF_GET, MTR) -#endif /****************************************************************** Use these macros to bufferfix a page with no latching. Remember not to read the contents of the page unless you know it is safe. Do not modify the contents of the page! We have separated this case, because it is error-prone programming not to set a latch, and it should be used with care. */ -#ifdef UNIV_SYNC_DEBUG #define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\ SP, OF, RW_NO_LATCH, NULL,\ BUF_GET_NO_LATCH, IB__FILE__, __LINE__, MTR) -#else -#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\ - SP, OF, RW_NO_LATCH, NULL,\ - BUF_GET_NO_LATCH, MTR) -#endif /****************************************************************** NOTE! The following macros should be used instead of buf_page_get_gen, to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */ -#ifdef UNIV_SYNC_DEBUG #define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\ SP, OF, LA, NULL,\ BUF_GET_NOWAIT, IB__FILE__, __LINE__, MTR) -#else -#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\ - SP, OF, LA, NULL,\ - BUF_GET_NOWAIT, MTR) -#endif /****************************************************************** NOTE! The following macros should be used instead of buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */ -#ifdef UNIV_SYNC_DEBUG #define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\ LA, G, MC, IB__FILE__, __LINE__, MTR) -#else -#define buf_page_optimistic_get(LA, G, MC, MTR) buf_page_optimistic_get_func(\ - LA, G, MC, MTR) -#endif /************************************************************************ This is the general function used to get optimistic access to a database page. */ @@ -175,10 +152,8 @@ buf_page_optimistic_get_func( buf_frame_t* guess, /* in: guessed frame */ dulint modify_clock,/* in: modify clock value if mode is ..._GUESS_ON_CLOCK */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr); /* in: mini-transaction */ /************************************************************************ Tries to get the page, but if file io is required, releases all latches @@ -210,10 +185,8 @@ buf_page_get_known_nowait( ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ buf_frame_t* guess, /* in: the known page frame */ ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr); /* in: mini-transaction */ /************************************************************************ This is the general function used to get access to a database page. */ @@ -228,10 +201,8 @@ buf_page_get_gen( buf_frame_t* guess, /* in: guessed frame or NULL */ ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, BUF_GET_NO_LATCH */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line where called */ -#endif mtr_t* mtr); /* in: mini-transaction */ /************************************************************************ Initializes a page to the buffer buf_pool. The page is usually not read @@ -455,6 +426,13 @@ Validates the buffer pool data structure. */ ibool buf_validate(void); /*==============*/ +/************************************************************************ +Prints a page to stderr. */ + +void +buf_page_print( +/*===========*/ + byte* read_buf); /* in: a database page */ /************************************************************************* Prints info of the buffer pool data structure. */ @@ -462,6 +440,12 @@ void buf_print(void); /*===========*/ /************************************************************************* +Returns the number of pending buf pool ios. */ + +ulint +buf_get_n_pending_ios(void); +/*=======================*/ +/************************************************************************* Prints info of the buffer i/o. */ void @@ -760,6 +744,8 @@ struct buf_pool_struct{ byte* frame_zero; /* pointer to the first buffer frame: this may differ from frame_mem, because this is aligned by the frame size */ + byte* high_end; /* pointer to the end of the + buffer pool */ buf_block_t* blocks; /* array of buffer control blocks */ ulint max_size; /* number of control blocks == maximum pool size in pages */ @@ -767,6 +753,9 @@ struct buf_pool_struct{ hash_table_t* page_hash; /* hash table of the file pages */ ulint n_pend_reads; /* number of pending read operations */ + + time_t last_printout_time; /* when buf_print was last time + called */ ulint n_pages_read; /* number read operations */ ulint n_pages_written;/* number write operations */ ulint n_pages_created;/* number of pages created in the pool @@ -782,6 +771,9 @@ struct buf_pool_struct{ hit rate */ ulint n_pages_read_old;/* n_pages_read when buf_print was last time called */ + ulint n_pages_written_old;/* number write operations */ + ulint n_pages_created_old;/* number of pages created in + the pool with no read */ /* 2. Page flushing algorithm fields */ UT_LIST_BASE_NODE_T(buf_block_t) flush_list; diff --git a/innobase/include/buf0buf.ic b/innobase/include/buf0buf.ic index 1ecc6f34a78..3d88d087e63 100644 --- a/innobase/include/buf0buf.ic +++ b/innobase/include/buf0buf.ic @@ -486,11 +486,7 @@ buf_block_buf_fix_inc_debug( { ibool ret; - ret = rw_lock_s_lock_func_nowait(&(block->debug_latch) -#ifdef UNIV_SYNC_DEBUG - ,file, line -#endif - ); + ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line); ut_ad(ret == TRUE); @@ -557,9 +553,7 @@ buf_page_get_release_on_io( frame = buf_page_get_gen(space, offset, rw_latch, guess, BUF_GET_IF_IN_POOL, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif mtr); if (frame != NULL) { diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h index c314281d758..c19d7ea5552 100644 --- a/innobase/include/data0data.h +++ b/innobase/include/data0data.h @@ -116,8 +116,8 @@ dfield_copy( Tests if data length and content is equal for two dfields. */ UNIV_INLINE ibool -dfield_datas_are_equal( -/*===================*/ +dfield_datas_are_binary_equal( +/*==========================*/ /* out: TRUE if equal */ dfield_t* field1, /* in: field */ dfield_t* field2);/* in: field */ @@ -125,8 +125,8 @@ dfield_datas_are_equal( Tests if dfield data length and content is equal to the given. */ UNIV_INLINE ibool -dfield_data_is_equal( -/*=================*/ +dfield_data_is_binary_equal( +/*========================*/ /* out: TRUE if equal */ dfield_t* field, /* in: field */ ulint len, /* in: data length or UNIV_SQL_NULL */ @@ -230,14 +230,18 @@ dtuple_get_data_size( dtuple_t* tuple); /* in: typed data tuple */ /**************************************************************** Returns TRUE if lengths of two dtuples are equal and respective data fields -in them are equal. */ -UNIV_INLINE +in them are equal when compared with collation in char fields (not as binary +strings). */ + ibool -dtuple_datas_are_equal( -/*===================*/ - /* out: TRUE if length and datas are equal */ +dtuple_datas_are_ordering_equal( +/*============================*/ + /* out: TRUE if length and fieds are equal + when compared with cmp_data_data: + NOTE: in character type fields some letters + are identified with others! (collation) */ dtuple_t* tuple1, /* in: tuple 1 */ - dtuple_t* tuple2); /* in: tuple 2 */ + dtuple_t* tuple2);/* in: tuple 2 */ /**************************************************************** Folds a prefix given as the number of fields of a tuple. */ UNIV_INLINE @@ -447,7 +451,7 @@ struct dfield_struct{ struct dtuple_struct { ulint info_bits; /* info bits of an index record: - default is 0; this field is used + the default is 0; this field is used if an index record is built from a data tuple */ ulint n_fields; /* number of fields in dtuple */ diff --git a/innobase/include/data0data.ic b/innobase/include/data0data.ic index b886ad6c69c..0750a3894d1 100644 --- a/innobase/include/data0data.ic +++ b/innobase/include/data0data.ic @@ -133,8 +133,8 @@ dfield_copy( Tests if data length and content is equal for two dfields. */ UNIV_INLINE ibool -dfield_datas_are_equal( -/*===================*/ +dfield_datas_are_binary_equal( +/*==========================*/ /* out: TRUE if equal */ dfield_t* field1, /* in: field */ dfield_t* field2) /* in: field */ @@ -157,8 +157,8 @@ dfield_datas_are_equal( Tests if dfield data length and content is equal to the given. */ UNIV_INLINE ibool -dfield_data_is_equal( -/*=================*/ +dfield_data_is_binary_equal( +/*========================*/ /* out: TRUE if equal */ dfield_t* field, /* in: field */ ulint len, /* in: data length or UNIV_SQL_NULL */ @@ -169,8 +169,7 @@ dfield_data_is_equal( return(FALSE); } - if ((len != UNIV_SQL_NULL) - && (0 != ut_memcmp(field->data, data, len))) { + if (len != UNIV_SQL_NULL && 0 != ut_memcmp(field->data, data, len)) { return(FALSE); } @@ -342,65 +341,6 @@ dtuple_get_data_size( return(sum); } -/**************************************************************** -Returns TRUE if lengths of two dtuples are equal and respective data fields -in them are equal. */ -UNIV_INLINE -ibool -dtuple_datas_are_equal( -/*===================*/ - /* out: TRUE if length and datas are equal */ - dtuple_t* tuple1, /* in: tuple 1 */ - dtuple_t* tuple2) /* in: tuple 2 */ -{ - dfield_t* field1; - dfield_t* field2; - ulint n_fields; - byte* data1; - byte* data2; - ulint len1; - ulint len2; - ulint i; - - ut_ad(tuple1 && tuple2); - ut_ad(tuple1->magic_n = DATA_TUPLE_MAGIC_N); - ut_ad(tuple2->magic_n = DATA_TUPLE_MAGIC_N); - ut_ad(dtuple_check_typed(tuple1)); - ut_ad(dtuple_check_typed(tuple2)); - - n_fields = dtuple_get_n_fields(tuple1); - - if (n_fields != dtuple_get_n_fields(tuple2)) { - - return(FALSE); - } - - for (i = 0; i < n_fields; i++) { - - field1 = dtuple_get_nth_field(tuple1, i); - data1 = (byte*) dfield_get_data(field1); - len1 = dfield_get_len(field1); - - field2 = dtuple_get_nth_field(tuple2, i); - data2 = (byte*) dfield_get_data(field2); - len2 = dfield_get_len(field2); - - if (len1 != len2) { - - return(FALSE); - } - - if (len1 != UNIV_SQL_NULL) { - if (ut_memcmp(data1, data2, len1) != 0) { - - return(FALSE); - } - } - } - - return(TRUE); -} - /*********************************************************************** Sets types of fields binary in a tuple. */ UNIV_INLINE diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h index 4817f0ca839..b53a70a8909 100644 --- a/innobase/include/data0type.h +++ b/innobase/include/data0type.h @@ -124,17 +124,6 @@ dtype_get_pad_char( /* out: padding character code, or ULINT_UNDEFINED if no padding specified */ dtype_t* type); /* in: typeumn */ -/************************************************************************* -Transforms the character code so that it is ordered appropriately -for the language. */ -UNIV_INLINE -ulint -dtype_collate( -/*==========*/ - /* out: padding character */ - dtype_t* type, /* in: type */ - ulint code); /* in: character code stored in database - record */ /*************************************************************************** Returns the size of a fixed size data type, 0 if not a fixed size type. */ UNIV_INLINE diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic index f6bdaf69662..4a62902eb1b 100644 --- a/innobase/include/data0type.ic +++ b/innobase/include/data0type.ic @@ -120,23 +120,6 @@ dtype_get_pad_char( return(ULINT_UNDEFINED); } -/************************************************************************* -Transforms the character code so that it is ordered appropriately for the -language. */ -UNIV_INLINE -ulint -dtype_collate( -/*==========*/ - /* out: collation order position */ - dtype_t* type, /* in: type */ - ulint code) /* in: character code stored in database - record */ -{ - ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR)); - - return(toupper(code)); -} - /************************************************************************** Stores to a type the information which determines its alphabetical ordering. */ @@ -198,6 +181,10 @@ dtype_get_fixed_size( case DATA_SYS: if (type->prtype == DATA_ROW_ID) { return(DATA_ROW_ID_LEN); + } else if (type->prtype == DATA_TRX_ID) { + return(DATA_TRX_ID_LEN); + } else if (type->prtype == DATA_ROLL_PTR) { + return(DATA_ROLL_PTR_LEN); } else { return(0); } diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h index 34513545faa..ddfbd5b7862 100644 --- a/innobase/include/db0err.h +++ b/innobase/include/db0err.h @@ -27,12 +27,21 @@ Created 5/24/1996 Heikki Tuuri #define DB_CLUSTER_NOT_FOUND 30 #define DB_TABLE_NOT_FOUND 31 #define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped - and restrated with more file space */ + and restarted with more file space */ #define DB_TABLE_IS_BEING_USED 33 #define DB_TOO_BIG_RECORD 34 /* a record in an index would become bigger than 1/2 free space in a page frame */ - +#define DB_LOCK_WAIT_TIMEOUT 35 /* lock wait lasted too long */ +#define DB_NO_REFERENCED_ROW 36 /* referenced key value not found + for a foreign key in an insert or + update of a row */ +#define DB_ROW_IS_REFERENCED 37 /* cannot delete or update a row + because it contains a key value + which is referenced */ +#define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint + to a table failed */ + /* The following are partial failure codes */ #define DB_FAIL 1000 #define DB_OVERFLOW 1001 diff --git a/innobase/include/dict0crea.h b/innobase/include/dict0crea.h index 6bc31e1e722..ccdedff42c8 100644 --- a/innobase/include/dict0crea.h +++ b/innobase/include/dict0crea.h @@ -71,6 +71,24 @@ dict_drop_index_tree( rec_t* rec, /* in: record in the clustered index of SYS_INDEXES table */ mtr_t* mtr); /* in: mtr having the latch on the record page */ +/******************************************************************** +Creates the foreign key constraints system tables inside InnoDB +at database creation or database start if they are not found or are +not of the right form. */ + +ulint +dict_create_or_check_foreign_constraint_tables(void); +/*================================================*/ + /* out: DB_SUCCESS or error code */ +/************************************************************************ +Adds foreign key definitions to data dictionary tables in the database. */ + +ulint +dict_create_add_foreigns_to_dictionary( +/*===================================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* table, /* in: table */ + trx_t* trx); /* in: transaction */ /* Table create node structure */ diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h index cec1430c9e9..56b55b8a417 100644 --- a/innobase/include/dict0dict.h +++ b/innobase/include/dict0dict.h @@ -138,6 +138,38 @@ dict_table_rename_in_cache( dict_table_t* table, /* in: table */ char* new_name); /* in: new name */ /************************************************************************** +Adds a foreign key constraint object to the dictionary cache. May free +the object if there already is an object with the same identifier in. +At least one of foreign table or referenced table must already be in +the dictionary cache! */ + +ulint +dict_foreign_add_to_cache( +/*======================*/ + /* out: DB_SUCCESS or error code */ + dict_foreign_t* foreign); /* in, own: foreign key constraint */ +/************************************************************************* +Scans a table create SQL string and adds to the data dictionary +the foreign key constraints declared in the string. This function +should be called after the indexes for a table have been created. +Each foreign key constraint must be accompanied with indexes in +bot participating tables. The indexes are allowed to contain more +fields than mentioned in the constraint. */ + +ulint +dict_create_foreign_constraints( +/*============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2; the default + database id the database of parameter name */ + char* name); /* in: table full name in the normalized form + database_name/table_name */ +/************************************************************************** Returns a table object and memoryfixes it. NOTE! This is a high-level function to be used mainly from outside the 'dict' directory. Inside this directory dict_table_get_low is usually the appropriate function. */ @@ -174,6 +206,14 @@ dict_table_release( /*===============*/ dict_table_t* table); /* in: table to be released */ /************************************************************************** +Checks if a table is in the dictionary cache. */ +UNIV_INLINE +dict_table_t* +dict_table_check_if_in_cache_low( +/*==============================*/ + /* out: table, NULL if not found */ + char* table_name); /* in: table name */ +/************************************************************************** Gets a table; loads it to the dictionary cache if necessary. A low-level function. */ UNIV_INLINE @@ -208,6 +248,13 @@ dict_table_print( /*=============*/ dict_table_t* table); /* in: table */ /************************************************************************** +Prints a table data. */ + +void +dict_table_print_low( +/*=================*/ + dict_table_t* table); /* in: table */ +/************************************************************************** Prints a table data when we know the table name. */ void @@ -319,6 +366,16 @@ dict_table_copy_types( dtuple_t* tuple, /* in: data tuple */ dict_table_t* table); /* in: index */ /************************************************************************** +Looks for an index with the given id. NOTE that we do not reserve +the dictionary mutex: this function is for emergency purposes like +printing info of a corrupt database page! */ + +dict_index_t* +dict_index_find_on_id_low( +/*======================*/ + /* out: index or NULL if not found from cache */ + dulint id); /* in: index id */ +/************************************************************************** Adds an index to dictionary cache. */ ibool @@ -640,6 +697,23 @@ dict_tree_get_space_reserve( reserved for updates */ dict_tree_t* tree); /* in: a tree */ /************************************************************************* +Calculates the minimum record length in an index. */ + +ulint +dict_index_calc_min_rec_len( +/*========================*/ + dict_index_t* index); /* in: index */ +/************************************************************************* +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. */ + +void +dict_update_statistics_low( +/*=======================*/ + dict_table_t* table, /* in: table */ + ibool has_dict_mutex);/* in: TRUE if the caller has the + dictionary mutex */ +/************************************************************************* Calculates new estimates for table and index statistics. The statistics are used in query optimization. */ @@ -661,7 +735,8 @@ dict_mutex_exit_for_mysql(void); /*===========================*/ -extern dict_sys_t* dict_sys; /* the dictionary system */ +extern dict_sys_t* dict_sys; /* the dictionary system */ +extern rw_lock_t dict_foreign_key_check_lock; /* Dictionary system struct */ struct dict_sys_struct{ diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic index 9089ebe8edd..821465f96a8 100644 --- a/innobase/include/dict0dict.ic +++ b/innobase/include/dict0dict.ic @@ -532,12 +532,11 @@ dict_tree_get_space_reserve( } /************************************************************************** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. */ +Checks if a table is in the dictionary cache. */ UNIV_INLINE dict_table_t* -dict_table_get_low( -/*===============*/ +dict_table_check_if_in_cache_low( +/*==============================*/ /* out: table, NULL if not found */ char* table_name) /* in: table name */ { @@ -552,6 +551,26 @@ dict_table_get_low( HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table, ut_strcmp(table->name, table_name) == 0); + return(table); +} + +/************************************************************************** +Gets a table; loads it to the dictionary cache if necessary. A low-level +function. */ +UNIV_INLINE +dict_table_t* +dict_table_get_low( +/*===============*/ + /* out: table, NULL if not found */ + char* table_name) /* in: table name */ +{ + dict_table_t* table; + + ut_ad(table_name); + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = dict_table_check_if_in_cache_low(table_name); + if (table == NULL) { table = dict_load_table(table_name); } @@ -603,6 +622,7 @@ dict_table_get_on_id_low( dict_table_t* table; ulint fold; + ut_ad(mutex_own(&(dict_sys->mutex))); UT_NOT_USED(trx); /* Look for the table name in the hash table */ diff --git a/innobase/include/dict0load.h b/innobase/include/dict0load.h index d0298d8df37..b60996a8dab 100644 --- a/innobase/include/dict0load.h +++ b/innobase/include/dict0load.h @@ -15,8 +15,19 @@ Created 4/24/1996 Heikki Tuuri #include "ut0byte.h" /************************************************************************ +Finds the first table name in the given database. */ + +char* +dict_get_first_table_name_in_db( +/*============================*/ + /* out, own: table name, NULL if does not exist; + the caller must free the memory in the string! */ + char* name); /* in: database name which ends to '/' */ +/************************************************************************ Loads a table definition and also all its index definitions, and also -the cluster definition, if the table is a member in a cluster. */ +the cluster definition if the table is a member in a cluster. Also loads +all foreign key constraints where the foreign key is in the table or where +a foreign key references columns in this table. */ dict_table_t* dict_load_table( @@ -40,6 +51,25 @@ void dict_load_sys_table( /*================*/ dict_table_t* table); /* in: system table */ +/*************************************************************************** +Loads foreign key constraints where the table is either the foreign key +holder or where the table is referenced by a foreign key. Adds these +constraints to the data dictionary. Note that we know that the dictionary +cache already contains all constraints where the other relevant table is +already in the dictionary cache. */ + +ulint +dict_load_foreigns( +/*===============*/ + /* out: DB_SUCCESS or error code */ + char* table_name); /* in: table name */ +/************************************************************************ +Prints to the standard output information on all tables found in the data +dictionary system table. */ + +void +dict_print(void); +/*============*/ #ifndef UNIV_NONINL diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index 74ecbc8bba2..bd24d1539ca 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -123,6 +123,13 @@ dict_mem_index_free( /*================*/ dict_index_t* index); /* in: index */ /************************************************************************** +Creates and initializes a foreign constraint memory object. */ + +dict_foreign_t* +dict_mem_foreign_create(void); +/*=========================*/ + /* out, own: foreign constraint struct */ +/************************************************************************** Creates a procedure memory object. */ dict_proc_t* @@ -221,15 +228,56 @@ struct dict_index_struct{ dictionary cache */ btr_search_t* search_info; /* info used in optimistic searches */ /*----------------------*/ - ulint stat_n_diff_key_vals; + ib_longlong* stat_n_diff_key_vals; /* approximate number of different key values - for this index; we periodically calculate - new estimates */ + for this index, for each n-column prefix + where n <= dict_get_n_unique(index); we + periodically calculate new estimates */ ulint stat_index_size; /* approximate index size in database pages */ + ulint stat_n_leaf_pages; + /* approximate number of leaf pages in the + index tree */ ulint magic_n;/* magic number */ }; +/* Data structure for a foreign key constraint; an example: +FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */ + +struct dict_foreign_struct{ + mem_heap_t* heap; /* this object is allocated from + this memory heap */ + char* id; /* id of the constraint as a + null-terminated string */ + char* foreign_table_name;/* foreign table name */ + dict_table_t* foreign_table; /* table where the foreign key is */ + char** foreign_col_names;/* names of the columns in the + foreign key */ + char* referenced_table_name;/* referenced table name */ + dict_table_t* referenced_table;/* table where the referenced key + is */ + char** referenced_col_names;/* names of the referenced + columns in the referenced table */ + ulint n_fields; /* number of indexes' first fields + for which the the foreign key + constraint is defined: we allow the + indexes to contain more fields than + mentioned in the constraint, as long + as the first fields are as mentioned */ + dict_index_t* foreign_index; /* foreign index; we require that + both tables contain explicitly defined + indexes for the constraint: InnoDB + does not generate new indexes + implicitly */ + dict_index_t* referenced_index;/* referenced index */ + UT_LIST_NODE_T(dict_foreign_t) + foreign_list; /* list node for foreign keys of the + table */ + UT_LIST_NODE_T(dict_foreign_t) + referenced_list;/* list node for referenced keys of the + table */ +}; + #define DICT_INDEX_MAGIC_N 76789786 /* Data structure for a database table */ @@ -247,6 +295,13 @@ struct dict_table_struct{ dict_col_t* cols; /* array of column descriptions */ UT_LIST_BASE_NODE_T(dict_index_t) indexes; /* list of indexes of the table */ + UT_LIST_BASE_NODE_T(dict_foreign_t) + foreign_list;/* list of foreign key constraints + in the table; these refer to columns + in other tables */ + UT_LIST_BASE_NODE_T(dict_foreign_t) + referenced_list;/* list of foreign key constraints + which refer to this table */ UT_LIST_NODE_T(dict_table_t) table_LRU; /* node of the LRU list of tables */ ulint mem_fix;/* count of how many times the table @@ -254,6 +309,13 @@ struct dict_table_struct{ currently NOT used */ ibool cached; /* TRUE if the table object has been added to the dictionary cache */ + lock_t* auto_inc_lock;/* a buffer for an auto-inc lock + for this table: we allocate the memory here + so that individual transactions can get it + and release it without a need to allocate + space from the lock heap of the trx: + otherwise the lock heap would grow rapidly + if we do a large insert from a select */ UT_LIST_BASE_NODE_T(lock_t) locks; /* list of locks on the table */ /*----------------------*/ @@ -278,7 +340,7 @@ struct dict_table_struct{ forget about value TRUE if it has to reload the table definition from disk */ /*----------------------*/ - ulint stat_n_rows; + ib_longlong stat_n_rows; /* approximate number of rows in the table; we periodically calculate new estimates */ ulint stat_clustered_index_size; diff --git a/innobase/include/dict0types.h b/innobase/include/dict0types.h index fe1bad45063..498c6f46b7b 100644 --- a/innobase/include/dict0types.h +++ b/innobase/include/dict0types.h @@ -16,6 +16,7 @@ typedef struct dict_index_struct dict_index_t; typedef struct dict_tree_struct dict_tree_t; typedef struct dict_table_struct dict_table_t; typedef struct dict_proc_struct dict_proc_t; +typedef struct dict_foreign_struct dict_foreign_t; /* A cluster object is a table object with the type field set to DICT_CLUSTERED */ diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h index bfc322270fc..ca74ea4cb2c 100644 --- a/innobase/include/fil0fil.h +++ b/innobase/include/fil0fil.h @@ -76,6 +76,9 @@ extern fil_addr_t fil_addr_null; #define FIL_TABLESPACE 501 #define FIL_LOG 502 +extern ulint fil_n_pending_log_flushes; +extern ulint fil_n_pending_tablespace_flushes; + /*********************************************************************** Reserves a right to open a single file. The right must be released with fil_release_right_to_open. */ diff --git a/innobase/include/ibuf0ibuf.h b/innobase/include/ibuf0ibuf.h index f0b333192de..99fb1595f49 100644 --- a/innobase/include/ibuf0ibuf.h +++ b/innobase/include/ibuf0ibuf.h @@ -226,6 +226,21 @@ ibuf_contract( issued read with the highest tablespace address to complete */ /************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ + +ulint +ibuf_contract_for_n_pages( +/*======================*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync, /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ + ulint n_pages);/* in: try to read at least this many pages to + the buffer pool and merge the ibuf contents to + them */ +/************************************************************************* Parses a redo log record of an ibuf bitmap page init. */ byte* diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h index c492e7b8ef3..5a15b78b869 100644 --- a/innobase/include/lock0lock.h +++ b/innobase/include/lock0lock.h @@ -21,15 +21,13 @@ Created 5/7/1996 Heikki Tuuri extern ibool lock_print_waits; -/***************************************************************** -Cancels a waiting record lock request and releases the waiting transaction -that requested it. NOTE: does NOT check if waiting lock requests behind this -one can now be granted! */ +/************************************************************************* +Gets the size of a lock struct. */ -void -lock_rec_cancel( -/*============*/ - lock_t* lock); /* in: waiting record lock request */ +ulint +lock_get_size(void); +/*===============*/ + /* out: size in bytes */ /************************************************************************* Creates the lock system at database start. */ @@ -388,6 +386,14 @@ lock_is_on_table( /* out: TRUE if there are lock(s) */ dict_table_t* table); /* in: database table in dictionary cache */ /************************************************************************* +Releases an auto-inc lock a transaction possibly has on a table. +Releases possible other transactions waiting for this lock. */ + +void +lock_table_unlock_auto_inc( +/*=======================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************* Releases transaction locks, and releases possible other transactions waiting because of these locks. */ @@ -396,6 +402,14 @@ lock_release_off_kernel( /*====================*/ trx_t* trx); /* in: transaction */ /************************************************************************* +Cancels a waiting lock request and releases possible other transactions +waiting behind it. */ + +void +lock_cancel_waiting_and_release( +/*============================*/ + lock_t* lock); /* in: waiting lock request */ +/************************************************************************* Resets all locks, both table and record locks, on a table to be dropped. No lock is allowed to be a wait lock. */ @@ -495,6 +509,8 @@ extern lock_sys_t* lock_sys; #define LOCK_IX 3 /* intention exclusive */ #define LOCK_S 4 /* shared */ #define LOCK_X 5 /* exclusive */ +#define LOCK_AUTO_INC 6 /* locks the auto-inc counter of a table + in an exclusive mode */ #define LOCK_MODE_MASK 0xF /* mask used to extract mode from the type_mode field in a lock */ #define LOCK_TABLE 16 /* these type values should be so high that */ diff --git a/innobase/include/log0log.h b/innobase/include/log0log.h index 001f98cfc3c..adff9fae544 100644 --- a/innobase/include/log0log.h +++ b/innobase/include/log0log.h @@ -659,6 +659,11 @@ struct log_struct{ mutex! */ ulint n_log_ios; /* number of log i/os initiated thus far */ + ulint n_log_ios_old; /* number of log i/o's at the + previous printout */ + time_t last_printout_time;/* when log_print was last time + called */ + /* Fields involved in checkpoints */ ulint max_modified_age_async; /* when this recommended value for lsn diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h index dec8eeb1e15..0ef25b3d1ee 100644 --- a/innobase/include/mtr0mtr.h +++ b/innobase/include/mtr0mtr.h @@ -203,20 +203,12 @@ mtr_read_dulint( mtr_t* mtr); /* in: mini-transaction handle */ /************************************************************************* This macro locks an rw-lock in s-mode. */ -#ifdef UNIV_SYNC_DEBUG #define mtr_s_lock(B, MTR) mtr_s_lock_func((B), IB__FILE__, __LINE__,\ (MTR)) -#else -#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), (MTR)) -#endif /************************************************************************* This macro locks an rw-lock in x-mode. */ -#ifdef UNIV_SYNC_DEBUG #define mtr_x_lock(B, MTR) mtr_x_lock_func((B), IB__FILE__, __LINE__,\ (MTR)) -#else -#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), (MTR)) -#endif /************************************************************************* NOTE! Use the macro above! Locks a lock in s-mode. */ @@ -225,10 +217,8 @@ void mtr_s_lock_func( /*============*/ rw_lock_t* lock, /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line number */ -#endif mtr_t* mtr); /* in: mtr */ /************************************************************************* NOTE! Use the macro above! @@ -238,10 +228,8 @@ void mtr_x_lock_func( /*============*/ rw_lock_t* lock, /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line number */ -#endif mtr_t* mtr); /* in: mtr */ /******************************************************* diff --git a/innobase/include/mtr0mtr.ic b/innobase/include/mtr0mtr.ic index 5718d872bcb..51112fc0d14 100644 --- a/innobase/include/mtr0mtr.ic +++ b/innobase/include/mtr0mtr.ic @@ -217,20 +217,14 @@ void mtr_s_lock_func( /*============*/ rw_lock_t* lock, /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line number */ -#endif mtr_t* mtr) /* in: mtr */ { ut_ad(mtr); ut_ad(lock); - rw_lock_s_lock_func(lock - #ifdef UNIV_SYNC_DEBUG - ,0, file, line - #endif - ); + rw_lock_s_lock_func(lock, 0, file, line); mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK); } @@ -242,20 +236,14 @@ void mtr_x_lock_func( /*============*/ rw_lock_t* lock, /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG char* file, /* in: file name */ ulint line, /* in: line number */ -#endif mtr_t* mtr) /* in: mtr */ { ut_ad(mtr); ut_ad(lock); - rw_lock_x_lock_func(lock, 0 - #ifdef UNIV_SYNC_DEBUG - , file, line - #endif - ); + rw_lock_x_lock_func(lock, 0, file, line); mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK); } diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h index 10c428cb9ca..6f2a99fc8c2 100644 --- a/innobase/include/rem0cmp.h +++ b/innobase/include/rem0cmp.h @@ -16,6 +16,32 @@ Created 7/1/1994 Heikki Tuuri #include "rem0rec.h" /***************************************************************** +Returns TRUE if two types are equal for comparison purposes. */ + +ibool +cmp_types_are_equal( +/*================*/ + /* out: TRUE if the types are considered + equal in comparisons */ + dtype_t* type1, /* in: type 1 */ + dtype_t* type2); /* in: type 2 */ +/***************************************************************** +This function is used to compare two data fields for which we know the +data type. */ +UNIV_INLINE +int +cmp_data_data( +/*==========*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + dtype_t* cur_type,/* in: data type of the fields */ + byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** This function is used to compare two dfields where at least the first has its data type field set. */ UNIV_INLINE diff --git a/innobase/include/row0ins.h b/innobase/include/row0ins.h index 612b9e8d73a..cc3b9fa7e9a 100644 --- a/innobase/include/row0ins.h +++ b/innobase/include/row0ins.h @@ -16,6 +16,28 @@ Created 4/20/1996 Heikki Tuuri #include "trx0types.h" #include "row0types.h" +/******************************************************************* +Checks if foreign key constraint fails for an index entry. Sets shared locks +which lock either the success or the failure of the constraint. NOTE that +the caller must have a shared latch on dict_foreign_key_check_lock. */ + +ulint +row_ins_check_foreign_constraint( +/*=============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_NO_REFERENCED_ROW, + or DB_ROW_IS_REFERENCED */ + ibool check_ref,/* in: TRUE If we want to check that + the referenced table is ok, FALSE if we + want to to check the foreign key table */ + dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the + tables mentioned in it must be in the + dictionary cache if they exist at all */ + dict_table_t* table, /* in: if check_ref is TRUE, then the foreign + table, else the referenced table */ + dict_index_t* index, /* in: index in table */ + dtuple_t* entry, /* in: index entry for index */ + que_thr_t* thr); /* in: query thread */ /************************************************************************* Creates an insert node struct. */ diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h index 31f9e15cddc..4e90c0ac590 100644 --- a/innobase/include/row0mysql.h +++ b/innobase/include/row0mysql.h @@ -133,6 +133,26 @@ row_update_prebuilt_trx( handle */ trx_t* trx); /* in: transaction handle */ /************************************************************************* +Unlocks an AUTO_INC type lock possibly reserved by trx. */ + +void +row_unlock_table_autoinc_for_mysql( +/*===============================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************* +Sets an AUTO_INC type lock on the table mentioned in prebuilt. The +AUTO_INC lock gives exclusive access to the auto-inc counter of the +table. The lock is reserved only for the duration of an SQL statement. +It is not compatible with another AUTO_INC or exclusive lock on the +table. */ + +int +row_lock_table_autoinc_for_mysql( +/*=============================*/ + /* out: error code or DB_SUCCESS */ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in the MySQL + table handle */ +/************************************************************************* Does an insert for MySQL. */ int @@ -211,6 +231,26 @@ row_create_index_for_mysql( dict_index_t* index, /* in: index defintion */ trx_t* trx); /* in: transaction handle */ /************************************************************************* +Scans a table create SQL string and adds to the data dictionary +the foreign key constraints declared in the string. This function +should be called after the indexes for a table have been created. +Each foreign key constraint must be accompanied with indexes in +bot participating tables. The indexes are allowed to contain more +fields than mentioned in the constraint. */ + +int +row_table_add_foreign_constraints( +/*==============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2 */ + char* name); /* in: table full name in the normalized form + database_name/table_name */ +/************************************************************************* Drops a table for MySQL. If the name of the dropped table ends to characters INNODB_MONITOR, then this also stops printing of monitor output by the master thread. */ @@ -224,6 +264,15 @@ row_drop_table_for_mysql( ibool has_dict_mutex);/* in: TRUE if the caller already owns the dictionary system mutex */ /************************************************************************* +Drops a database for MySQL. */ + +int +row_drop_database_for_mysql( +/*========================*/ + /* out: error code or DB_SUCCESS */ + char* name, /* in: database name which ends to '/' */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* Renames a table for MySQL. */ int diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h index 9bb73726b29..106d3866b25 100644 --- a/innobase/include/row0upd.h +++ b/innobase/include/row0upd.h @@ -47,8 +47,7 @@ upd_get_nth_field( upd_t* update, /* in: update vector */ ulint n); /* in: field position in update vector */ /************************************************************************* -Sets the clustered index field number to be updated by an update vector -field. */ +Sets an index field number to be updated by an update vector field. */ UNIV_INLINE void upd_field_set_field_no( @@ -56,7 +55,7 @@ upd_field_set_field_no( upd_field_t* upd_field, /* in: update vector field */ ulint field_no, /* in: field number in a clustered index */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index); /* in: index */ /************************************************************************* Writes into the redo log the values of trx id and roll ptr and enough info to determine their positions within a clustered index record. */ @@ -136,13 +135,27 @@ row_upd_rec_in_place( rec_t* rec, /* in/out: record where replaced */ upd_t* update);/* in: update vector */ /******************************************************************* +Builds an update vector from those fields which in a secondary index entry +differ from a record that has the equal ordering fields. NOTE: we compare +the fields as binary strings! */ + +upd_t* +row_upd_build_sec_rec_difference_binary( +/*====================================*/ + /* out, own: update vector of differing + fields */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: entry to insert */ + rec_t* rec, /* in: secondary index record */ + mem_heap_t* heap); /* in: memory heap from which allocated */ +/******************************************************************* Builds an update vector from those fields, excluding the roll ptr and trx id fields, which in an index entry differ from a record that has -the equal ordering fields. */ +the equal ordering fields. NOTE: we compare the fields as binary strings! */ upd_t* -row_upd_build_difference( -/*=====================*/ +row_upd_build_difference_binary( +/*============================*/ /* out, own: update vector of differing fields, excluding roll ptr and trx id */ dict_index_t* index, /* in: clustered index */ @@ -175,13 +188,16 @@ row_upd_clust_index_replace_new_col_vals( /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. */ - +fields in the index is small. Otherwise, this can be quadratic. +NOTE: we compare the fields as binary strings! */ + ibool -row_upd_changes_ord_field( -/*======================*/ +row_upd_changes_ord_field_binary( +/*=============================*/ /* out: TRUE if update vector changes - an ordering field in the index record */ + an ordering field in the index record; + NOTE: the fields are compared as binary + strings */ dtuple_t* row, /* in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at @@ -191,11 +207,12 @@ row_upd_changes_ord_field( /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. */ +fields in the index is small. Otherwise, this can be quadratic. +NOTE: we compare the fields as binary strings! */ ibool -row_upd_changes_some_index_ord_field( -/*=================================*/ +row_upd_changes_some_index_ord_field_binary( +/*========================================*/ /* out: TRUE if update vector may change an ordering field in an index record */ dict_table_t* table, /* in: table */ diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic index b785e52caa0..7015b2eda13 100644 --- a/innobase/include/row0upd.ic +++ b/innobase/include/row0upd.ic @@ -70,8 +70,7 @@ upd_get_nth_field( } /************************************************************************* -Sets the clustered index field number to be updated by an update vector -field. */ +Sets an index field number to be updated by an update vector field. */ UNIV_INLINE void upd_field_set_field_no( @@ -79,12 +78,18 @@ upd_field_set_field_no( upd_field_t* upd_field, /* in: update vector field */ ulint field_no, /* in: field number in a clustered index */ - dict_index_t* index) /* in: clustered index */ -{ - ut_ad(index->type & DICT_CLUSTERED); - + dict_index_t* index) /* in: index */ +{ upd_field->field_no = field_no; + if (field_no >= dict_index_get_n_fields(index)) { + fprintf(stderr, + "InnoDB: Error: trying to access field %lu in table %s\n" + "InnoDB: index %s, but index has only %lu fields\n", + field_no, index->table_name, index->name, + dict_index_get_n_fields(index)); + } + dtype_copy(dfield_get_type(&(upd_field->new_val)), dict_index_get_nth_type(index, field_no)); } diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index 3f014adb76c..49eb2b6ec35 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -16,6 +16,11 @@ Created 10/10/1995 Heikki Tuuri #include "com0com.h" #include "que0types.h" + +/* When this event is set the lock timeout and InnoDB monitor +thread starts running */ +extern os_event_t srv_lock_timeout_thread_event; + /* Server parameters which are read from the initfile */ extern char* srv_data_home; @@ -27,6 +32,8 @@ extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; extern ulint* srv_data_file_is_raw_partition; +extern ibool srv_created_new_raw; + #define SRV_NEW_RAW 1 #define SRV_OLD_RAW 2 @@ -39,6 +46,8 @@ extern ibool srv_log_archive_on; extern ulint srv_log_buffer_size; extern ibool srv_flush_log_at_trx_commit; +extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 + character set */ extern ibool srv_use_native_aio; extern ulint srv_pool_size; @@ -54,6 +63,7 @@ extern ulint srv_lock_wait_timeout; extern char* srv_unix_file_flush_method_str; extern ulint srv_unix_file_flush_method; +extern ulint srv_force_recovery; extern ibool srv_use_doublewrite_buf; @@ -70,6 +80,7 @@ extern ulint srv_n_rows_read; extern ibool srv_print_innodb_monitor; extern ibool srv_print_innodb_lock_monitor; extern ibool srv_print_innodb_tablespace_monitor; +extern ibool srv_print_innodb_table_monitor; extern ulint srv_n_spin_wait_rounds; extern ulint srv_spin_wait_delay; @@ -132,6 +143,25 @@ what these mean */ #define SRV_UNIX_LITTLESYNC 3 #define SRV_UNIX_NOSYNC 4 +/* Alternatives for srv_force_recovery. Non-zero values are intended +to help the user get a damaged database up so that he can dump intact +tables and rows with SELECT INTO OUTFILE. The database must not otherwise +be used with these options! A bigger number below means that all precautions +of lower numbers are included. */ + +#define SRV_FORCE_IGNORE_CORRUPT 1 /* let the server run even if it + detects a corrupt page */ +#define SRV_FORCE_NO_BACKGROUND 2 /* prevent the main thread from + running: if a crash would occur + in purge, this prevents it */ +#define SRV_FORCE_NO_TRX_UNDO 3 /* do not run trx rollback after + recovery */ +#define SRV_FORCE_NO_IBUF_MERGE 4 /* prevent also ibuf operations: + if they would cause a crash, better + not do them */ +#define SRV_FORCE_NO_LOG_REDO 5 /* do not do the log roll-forward + in connection with recovery */ + /************************************************************************* Boots Innobase server. */ @@ -224,15 +254,30 @@ srv_release_mysql_thread_if_suspended( que_thr_t* thr); /* in: query thread associated with the MySQL OS thread */ /************************************************************************* -A thread which wakes up threads whose lock wait may have lasted too long. */ +A thread which wakes up threads whose lock wait may have lasted too long. +This also prints the info output by various InnoDB monitors. */ + +#ifndef __WIN__ +void* +#else +ulint +#endif +srv_lock_timeout_and_monitor_thread( +/*================================*/ + /* out: a dummy parameter */ + void* arg); /* in: a dummy parameter required by + os_thread_create */ +/************************************************************************* +A thread which prints warnings about semaphore waits which have lasted +too long. These can be used to track bugs which cause hangs. */ #ifndef __WIN__ void* #else ulint #endif -srv_lock_timeout_monitor_thread( -/*============================*/ +srv_error_monitor_thread( +/*=====================*/ /* out: a dummy parameter */ void* arg); /* in: a dummy parameter required by os_thread_create */ diff --git a/innobase/include/sync0arr.h b/innobase/include/sync0arr.h index 75d79f4c93f..f0134894997 100644 --- a/innobase/include/sync0arr.h +++ b/innobase/include/sync0arr.h @@ -51,13 +51,9 @@ sync_array_reserve_cell( sync_array_t* arr, /* in: wait array */ void* object, /* in: pointer to the object to wait for */ ulint type, /* in: lock request type */ - #ifdef UNIV_SYNC_DEBUG - char* file, /* in: in debug version file where - requested */ - ulint line, /* in: in the debug version line where - requested */ - #endif - ulint* index); /* out: index of the reserved cell */ + char* file, /* in: file where requested */ + ulint line, /* in: line where requested */ + ulint* index); /* out: index of the reserved cell */ /********************************************************************** This function should be called when a thread starts to wait on a wait array cell. In the debug version this function checks @@ -90,6 +86,20 @@ sync_array_signal_object( /*=====================*/ sync_array_t* arr, /* in: wait array */ void* object);/* in: wait object */ +/************************************************************************** +If the wakeup algorithm does not work perfectly at semaphore relases, +this function will do the waking (see the comment in mutex_exit). This +function should be called about every 1 second in the server. */ + +void +sync_arr_wake_threads_if_sema_free(void); +/*====================================*/ +/************************************************************************** +Prints warnings of long semaphore waits to stderr. Currently > 120 sec. */ + +void +sync_array_print_long_waits(void); +/*=============================*/ /************************************************************************ Validates the integrity of the wait array. Checks that the number of reserved cells equals the count variable. */ diff --git a/innobase/include/sync0ipm.ic b/innobase/include/sync0ipm.ic index 8487830e1dd..b8aa87ba6d6 100644 --- a/innobase/include/sync0ipm.ic +++ b/innobase/include/sync0ipm.ic @@ -92,7 +92,7 @@ loop: loop_count++; ut_ad(loop_count < 15); - if (mutex_enter_nowait(mutex) == 0) { + if (mutex_enter_nowait(mutex, IB__FILE__, __LINE__) == 0) { /* Succeeded! */ return(0); @@ -105,7 +105,7 @@ loop: /* Order is important here: FIRST reset event, then set waiters */ ip_mutex_set_waiters(ip_mutex, 1); - if (mutex_enter_nowait(mutex) == 0) { + if (mutex_enter_nowait(mutex, IB__FILE__, __LINE__) == 0) { /* Succeeded! */ return(0); diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h index 4e055da3810..7ad38f5bc7f 100644 --- a/innobase/include/sync0rw.h +++ b/innobase/include/sync0rw.h @@ -46,9 +46,10 @@ extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if extern ulint rw_s_system_call_count; extern ulint rw_s_spin_wait_count; extern ulint rw_s_exit_count; - +extern ulint rw_s_os_wait_count; extern ulint rw_x_system_call_count; extern ulint rw_x_spin_wait_count; +extern ulint rw_x_os_wait_count; extern ulint rw_x_exit_count; /********************************************************************** @@ -92,32 +93,20 @@ rw_lock_validate( NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_s_lock(M) rw_lock_s_lock_func(\ (M), 0, IB__FILE__, __LINE__) -#else -#define rw_lock_s_lock(M) rw_lock_s_lock_func(M) -#endif /****************************************************************** NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\ (M), (P), IB__FILE__, __LINE__) -#else -#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(M) -#endif /****************************************************************** NOTE! The following macros should be used in rw s-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(\ (M), IB__FILE__, __LINE__) -#else -#define rw_lock_s_lock_nowait(M) rw_lock_s_lock_func_nowait(M) -#endif /********************************************************************** NOTE! Use the corresponding macro, not directly this function, except if you supply the file name and line number. Lock an rw-lock in shared mode @@ -129,14 +118,11 @@ UNIV_INLINE void rw_lock_s_lock_func( /*================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock will + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** NOTE! Use the corresponding macro, not directly this function, except if you supply the file name and line number. Lock an rw-lock in shared mode @@ -146,12 +132,9 @@ ibool rw_lock_s_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** NOTE! Use the corresponding macro, not directly this function! Lock an rw-lock in exclusive mode for the current thread if the lock can be @@ -161,12 +144,9 @@ ibool rw_lock_x_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** Releases a shared mode lock. */ UNIV_INLINE @@ -199,32 +179,20 @@ Releases a shared mode lock. */ NOTE! The following macro should be used in rw x-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_x_lock(M) rw_lock_x_lock_func(\ (M), 0, IB__FILE__, __LINE__) -#else -#define rw_lock_x_lock(M) rw_lock_x_lock_func(M, 0) -#endif /****************************************************************** NOTE! The following macro should be used in rw x-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\ (M), (P), IB__FILE__, __LINE__) -#else -#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(M, P) -#endif /****************************************************************** NOTE! The following macros should be used in rw x-locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\ (M), IB__FILE__, __LINE__) -#else -#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(M) -#endif /********************************************************************** NOTE! Use the corresponding macro, not directly this function! Lock an rw-lock in exclusive mode for the current thread. If the rw-lock is locked @@ -239,13 +207,10 @@ void rw_lock_x_lock_func( /*================*/ rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass /* in: pass value; != 0, if the lock will + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** Releases an exclusive mode lock. */ UNIV_INLINE @@ -283,10 +248,8 @@ void rw_lock_s_lock_direct( /*==================*/ rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG ,char* file_name, /* in: file name where lock requested */ ulint line /* in: line where requested */ - #endif ); /********************************************************************** Low-level function which locks an rw-lock in x-mode when we know that it @@ -297,10 +260,8 @@ void rw_lock_x_lock_direct( /*==================*/ rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG ,char* file_name, /* in: file name where lock requested */ ulint line /* in: line where requested */ - #endif ); /********************************************************************** This function is used in the insert buffer to move the ownership of an @@ -349,6 +310,23 @@ rw_lock_get_x_lock_count( /*=====================*/ /* out: value of writer_count */ rw_lock_t* lock); /* in: rw-lock */ +/************************************************************************ +Accessor functions for rw lock. */ +UNIV_INLINE +ulint +rw_lock_get_waiters( +/*================*/ + rw_lock_t* lock); +UNIV_INLINE +ulint +rw_lock_get_writer( +/*===============*/ + rw_lock_t* lock); +UNIV_INLINE +ulint +rw_lock_get_reader_count( +/*=====================*/ + rw_lock_t* lock); /********************************************************************** Checks if the thread has locked the rw-lock in the specified mode, with the pass value == 0. */ @@ -414,9 +392,6 @@ rw_lock_debug_print( /*================*/ rw_lock_debug_t* info); /* in: debug struct */ - -#define RW_CNAME_LEN 8 - /* NOTE! The structure appears here only for the compiler to know its size. Do not use its fields directly! The structure used in the spin lock implementation of a read-write lock. Several threads may have a shared lock @@ -447,7 +422,7 @@ struct rw_lock_struct { ulint waiters; /* This ulint is set to 1 if there are waiters (readers or writers) in the global wait array, waiting for this rw_lock. - Otherwise, = 0. */ + Otherwise, == 0. */ ibool writer_is_wait_ex; /* This is TRUE if the writer field is RW_LOCK_WAIT_EX; this field is located far @@ -463,9 +438,12 @@ struct rw_lock_struct { info list of the lock */ ulint level; /* Debug version: level in the global latching order; default SYNC_LEVEL_NONE */ - char cfile_name[RW_CNAME_LEN]; - /* File name where lock created */ + char* cfile_name; /* File name where lock created */ ulint cline; /* Line where created */ + char* last_s_file_name;/* File name where last time s-locked */ + char* last_x_file_name;/* File name where last time x-locked */ + ulint last_s_line; /* Line number where last time s-locked */ + ulint last_x_line; /* Line number where last time x-locked */ ulint magic_n; }; diff --git a/innobase/include/sync0rw.ic b/innobase/include/sync0rw.ic index 11add13d2d0..2a02cfb6a53 100644 --- a/innobase/include/sync0rw.ic +++ b/innobase/include/sync0rw.ic @@ -15,14 +15,11 @@ waiting for the lock before suspending the thread. */ void rw_lock_s_lock_spin( /*================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock will + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -); + char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ /********************************************************************** Inserts the debug information for an rw-lock. */ @@ -128,14 +125,11 @@ ibool rw_lock_s_lock_low( /*===============*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock will be + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + ulint line) /* in: line where requested */ { ut_ad(mutex_own(rw_lock_get_mutex(lock))); @@ -150,6 +144,9 @@ rw_lock_s_lock_low( line); #endif + lock->last_s_file_name = file_name; + lock->last_s_line = line; + return(TRUE); /* locking succeeded */ } @@ -164,12 +161,9 @@ UNIV_INLINE void rw_lock_s_lock_direct( /*==================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ut_ad(lock->writer == RW_LOCK_NOT_LOCKED); ut_ad(rw_lock_get_reader_count(lock) == 0); @@ -177,6 +171,9 @@ rw_lock_s_lock_direct( /* Set the shared lock by incrementing the reader count */ lock->reader_count++; + lock->last_s_file_name = file_name; + lock->last_s_line = line; + #ifdef UNIV_SYNC_DEBUG rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line); #endif @@ -190,12 +187,9 @@ UNIV_INLINE void rw_lock_x_lock_direct( /*==================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name, /* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ut_ad(rw_lock_validate(lock)); ut_ad(rw_lock_get_reader_count(lock) == 0); @@ -206,6 +200,9 @@ rw_lock_x_lock_direct( lock->writer_count++; lock->pass = 0; + lock->last_x_file_name = file_name; + lock->last_x_line = line; + #ifdef UNIV_SYNC_DEBUG rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); #endif @@ -221,14 +218,11 @@ UNIV_INLINE void rw_lock_s_lock_func( /*================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock will + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + ulint line) /* in: line where requested */ { /* NOTE: As we do not know the thread ids for threads which have s-locked a latch, and s-lockers will be served only after waiting @@ -245,11 +239,7 @@ rw_lock_s_lock_func( mutex_enter(rw_lock_get_mutex(lock)); - if (TRUE == rw_lock_s_lock_low(lock - #ifdef UNIV_SYNC_DEBUG - ,pass, file_name, line - #endif - )) { + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { mutex_exit(rw_lock_get_mutex(lock)); return; /* Success */ @@ -257,11 +247,8 @@ rw_lock_s_lock_func( /* Did not succeed, try spin wait */ mutex_exit(rw_lock_get_mutex(lock)); - rw_lock_s_lock_spin(lock - #ifdef UNIV_SYNC_DEBUG - ,pass, file_name, line - #endif - ); + rw_lock_s_lock_spin(lock, pass, file_name, line); + return; } } @@ -275,12 +262,9 @@ ibool rw_lock_s_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ibool success = FALSE; @@ -294,6 +278,9 @@ rw_lock_s_lock_func_nowait( rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line); #endif + + lock->last_s_file_name = file_name; + lock->last_s_line = line; success = TRUE; } @@ -312,12 +299,9 @@ ibool rw_lock_x_lock_func_nowait( /*=======================*/ /* out: TRUE if success */ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + rw_lock_t* lock, /* in: pointer to rw-lock */ + char* file_name, /* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ibool success = FALSE; @@ -338,6 +322,9 @@ rw_lock_x_lock_func_nowait( rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); #endif + lock->last_x_file_name = file_name; + lock->last_x_line = line; + success = TRUE; } diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h index cb86b2b815c..6c3bff66e27 100644 --- a/innobase/include/sync0sync.h +++ b/innobase/include/sync0sync.h @@ -64,24 +64,15 @@ mutex_free( NOTE! The following macro should be used in mutex locking, not the corresponding function. */ -#ifdef UNIV_SYNC_DEBUG #define mutex_enter(M) mutex_enter_func((M), IB__FILE__, __LINE__) -#else -#define mutex_enter(M) mutex_enter_func(M) -#endif /****************************************************************** NOTE! The following macro should be used in mutex locking, not the corresponding function. */ /* NOTE! currently same as mutex_enter! */ -#ifdef UNIV_SYNC_DEBUG -#define mutex_enter_fast(M) mutex_enter_func((M), IB__FILE__, __LINE__) -#else -#define mutex_enter_fast(M) mutex_enter_func(M) -#endif - -#define mutex_enter_fast_func mutex_enter_func; +#define mutex_enter_fast(M) mutex_enter_func((M), IB__FILE__, __LINE__) +#define mutex_enter_fast_func mutex_enter_func; /********************************************************************** NOTE! Use the corresponding macro in the header file, not this function directly. Locks a mutex for the current thread. If the mutex is reserved @@ -91,12 +82,9 @@ UNIV_INLINE void mutex_enter_func( /*=============*/ - mutex_t* mutex /* in: pointer to mutex */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where locked */ - ulint line /* in: line where locked */ - #endif - ); + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name, /* in: file name where locked */ + ulint line); /* in: line where locked */ /************************************************************************ Tries to lock the mutex for the current thread. If the lock is not acquired immediately, returns with return value 1. */ @@ -104,8 +92,11 @@ immediately, returns with return value 1. */ ulint mutex_enter_nowait( /*===============*/ - /* out: 0 if succeed, 1 if not */ - mutex_t* mutex); /* in: pointer to mutex */ + /* out: 0 if succeed, 1 if not */ + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name, /* in: file name where mutex + requested */ + ulint line); /* in: line where requested */ /********************************************************************** Unlocks a mutex owned by the current thread. */ UNIV_INLINE @@ -373,6 +364,7 @@ Memory pool mutex */ #define SYNC_LEVEL_NONE 2000 /* default: level not defined */ #define SYNC_DICT 1000 #define SYNC_DICT_AUTOINC_MUTEX 999 +#define SYNC_FOREIGN_KEY_CHECK 998 #define SYNC_PURGE_IS_RUNNING 997 #define SYNC_DICT_HEADER 995 #define SYNC_IBUF_HEADER 914 @@ -418,6 +410,7 @@ Memory pool mutex */ #define SYNC_BUF_BLOCK 149 #define SYNC_DOUBLEWRITE 140 #define SYNC_ANY_LATCH 135 +#define SYNC_THR_LOCAL 133 #define SYNC_MEM_HASH 131 #define SYNC_MEM_POOL 130 @@ -429,8 +422,6 @@ Memory pool mutex */ #define RW_LOCK_WAIT_EX 353 #define SYNC_MUTEX 354 -#define MUTEX_CNAME_LEN 8 - /* NOTE! The structure appears here only for the compiler to know its size. Do not use its fields directly! The structure used in the spin lock implementation of a mutual exclusion semaphore. */ @@ -457,8 +448,7 @@ struct mutex_struct { locked */ ulint level; /* Debug version: level in the global latching order; default SYNC_LEVEL_NONE */ - char cfile_name[MUTEX_CNAME_LEN]; - /* File name where mutex created */ + char* cfile_name; /* File name where mutex created */ ulint cline; /* Line where created */ ulint magic_n; }; diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic index f7b341cb386..9531377ce0b 100644 --- a/innobase/include/sync0sync.ic +++ b/innobase/include/sync0sync.ic @@ -22,13 +22,9 @@ for the mutex before suspending the thread. */ void mutex_spin_wait( /*============*/ - mutex_t* mutex /* in: pointer to mutex */ - - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where mutex requested */ - ulint line /* in: line where requested */ - #endif -); + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name,/* in: file name where mutex requested */ + ulint line); /* in: line where requested */ /********************************************************************** Sets the debug information for a reserved mutex. */ @@ -209,6 +205,18 @@ mutex_exit( #endif mutex_reset_lock_word(mutex); + /* A problem: we assume that mutex_reset_lock word + is a memory barrier, that is when we read the waiters + field next, the read must be serialized in memory + after the reset. A speculative processor might + perform the read first, which could leave a waiting + thread hanging indefinitely. + + Our current solution call every 10 seconds + sync_arr_wake_threads_if_sema_free() + to wake up possible hanging threads if + they are missed in mutex_signal_object. */ + if (mutex_get_waiters(mutex) != 0) { mutex_signal_object(mutex); @@ -227,12 +235,9 @@ UNIV_INLINE void mutex_enter_func( /*=============*/ - mutex_t* mutex /* in: pointer to mutex */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where locked */ - ulint line /* in: line where locked */ - #endif - ) + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name,/* in: file name where locked */ + ulint line) /* in: line where locked */ { ut_ad(mutex_validate(mutex)); @@ -245,13 +250,11 @@ mutex_enter_func( mutex_set_debug_info(mutex, file_name, line); #endif + mutex->file_name = file_name; + mutex->line = line; + return; /* Succeeded! */ } - mutex_spin_wait(mutex - #ifdef UNIV_SYNC_DEBUG - ,file_name, - line - #endif - ); + mutex_spin_wait(mutex, file_name, line); } diff --git a/innobase/include/trx0rseg.ic b/innobase/include/trx0rseg.ic index aeb4466ff0f..423447d5566 100644 --- a/innobase/include/trx0rseg.ic +++ b/innobase/include/trx0rseg.ic @@ -61,7 +61,11 @@ trx_rsegf_get_nth_undo( ulint n, /* in: index of slot */ mtr_t* mtr) /* in: mtr */ { - ut_ad(n < TRX_RSEG_N_SLOTS); + if (n >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: trying to get slot %lu of rseg\n", n); + ut_a(0); + } return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr)); @@ -78,7 +82,11 @@ trx_rsegf_set_nth_undo( ulint page_no,/* in: page number of the undo log segment */ mtr_t* mtr) /* in: mtr */ { - ut_ad(n < TRX_RSEG_N_SLOTS); + if (n >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: trying to set slot %lu of rseg\n", n); + ut_a(0); + } mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE, page_no, MLOG_4BYTES, mtr); diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index f179e20ad62..8db0b39d3b4 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -317,6 +317,19 @@ struct trx_struct{ ibool has_search_latch; /* TRUE if this trx has latched the search system latch in S-mode */ + ulint search_latch_timeout; + /* If we notice that someone is + waiting for our S-lock on the search + latch to be released, we wait in + row0sel.c for BTR_SEA_TIMEOUT new + searches until we try to keep + the search latch again over + calls from MySQL; this is intended + to reduce contention on the search + latch */ + lock_t* auto_inc_lock; /* possible auto-inc lock reserved by + the transaction; note that it is also + in the lock list trx_locks */ ibool ignore_duplicates_in_insert; /* in an insert roll back only insert of the latest row in case @@ -401,11 +414,9 @@ struct trx_struct{ checking algorithm */ /*------------------------------*/ mem_heap_t* lock_heap; /* memory heap for the locks of the - transaction; protected by - lock_heap_mutex */ + transaction */ UT_LIST_BASE_NODE_T(lock_t) - trx_locks; /* locks reserved by the transaction; - protected by lock_heap_mutex */ + trx_locks; /* locks reserved by the transaction */ /*------------------------------*/ mem_heap_t* read_view_heap; /* memory heap for the read view */ read_view_t* read_view; /* consistent read view or NULL */ diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h index 8e5a4fda0d3..2d245e5f72f 100644 --- a/innobase/include/ut0mem.h +++ b/innobase/include/ut0mem.h @@ -13,6 +13,9 @@ Created 5/30/1994 Heikki Tuuri #include <string.h> #include <stdlib.h> +/* The total amount of memory currently allocated from the OS with malloc */ +extern ulint ut_total_allocated_memory; + UNIV_INLINE void* ut_memcpy(void* dest, void* sour, ulint n); diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c index df35e22005f..fa0641bad73 100644 --- a/innobase/lock/lock0lock.c +++ b/innobase/lock/lock0lock.c @@ -578,6 +578,17 @@ lock_sys_create( } /************************************************************************* +Gets the size of a lock struct. */ + +ulint +lock_get_size(void) +/*===============*/ + /* out: size in bytes */ +{ + return((ulint)sizeof(lock_t)); +} + +/************************************************************************* Gets the mode of a lock. */ UNIV_INLINE ulint @@ -709,13 +720,17 @@ lock_mode_stronger_or_eq( ulint mode2) /* in: lock mode */ { ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX - || mode1 == LOCK_IS); + || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC); ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX - || mode2 == LOCK_IS); + || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC); if (mode1 == LOCK_X) { return(TRUE); + } else if (mode1 == LOCK_AUTO_INC && mode2 == LOCK_AUTO_INC) { + + return(TRUE); + } else if (mode1 == LOCK_S && (mode2 == LOCK_S || mode2 == LOCK_IS)) { return(TRUE); @@ -743,9 +758,9 @@ lock_mode_compatible( ulint mode2) /* in: lock mode */ { ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX - || mode1 == LOCK_IS); + || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC); ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX - || mode2 == LOCK_IS); + || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC); if (mode1 == LOCK_S && (mode2 == LOCK_IS || mode2 == LOCK_S)) { @@ -755,12 +770,18 @@ lock_mode_compatible( return(FALSE); + } else if (mode1 == LOCK_AUTO_INC && (mode2 == LOCK_IS + || mode2 == LOCK_IX)) { + return(TRUE); + } else if (mode1 == LOCK_IS && (mode2 == LOCK_IS || mode2 == LOCK_IX + || mode2 == LOCK_AUTO_INC || mode2 == LOCK_S)) { return(TRUE); } else if (mode1 == LOCK_IX && (mode2 == LOCK_IS + || mode2 == LOCK_AUTO_INC || mode2 == LOCK_IX)) { return(TRUE); } @@ -1836,7 +1857,7 @@ lock_grant( Cancels a waiting record lock request and releases the waiting transaction that requested it. NOTE: does NOT check if waiting lock requests behind this one can now be granted! */ - +static void lock_rec_cancel( /*============*/ @@ -2812,7 +2833,18 @@ lock_table_create( ut_ad(table && trx); ut_ad(mutex_own(&kernel_mutex)); - lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t)); + if (type_mode == LOCK_AUTO_INC) { + /* Only one trx can have the lock on the table + at a time: we may use the memory preallocated + to the table object */ + + lock = table->auto_inc_lock; + + ut_a(trx->auto_inc_lock == NULL); + trx->auto_inc_lock = lock; + } else { + lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t)); + } if (lock == NULL) { @@ -2854,6 +2886,10 @@ lock_table_remove_low( table = lock->un_member.tab_lock.table; trx = lock->trx; + if (lock == trx->auto_inc_lock) { + trx->auto_inc_lock = NULL; + } + UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock); UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock); } @@ -2988,7 +3024,7 @@ lock_table( if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) { - /* Another trx has request on the table in an incompatible + /* Another trx has a request on the table in an incompatible mode: this trx must wait */ err = lock_table_enqueue_waiting(mode, table, thr); @@ -3102,6 +3138,24 @@ lock_table_dequeue( /*=========================== LOCK RELEASE ==============================*/ /************************************************************************* +Releases an auto-inc lock a transaction possibly has on a table. +Releases possible other transactions waiting for this lock. */ + +void +lock_table_unlock_auto_inc( +/*=======================*/ + trx_t* trx) /* in: transaction */ +{ + if (trx->auto_inc_lock) { + mutex_enter(&kernel_mutex); + + lock_table_dequeue(trx->auto_inc_lock); + + mutex_exit(&kernel_mutex); + } +} + +/************************************************************************* Releases transaction locks, and releases possible other transactions waiting because of these locks. */ @@ -3147,6 +3201,37 @@ lock_release_off_kernel( } mem_heap_empty(trx->lock_heap); + + ut_a(trx->auto_inc_lock == NULL); +} + +/************************************************************************* +Cancels a waiting lock request and releases possible other transactions +waiting behind it. */ + +void +lock_cancel_waiting_and_release( +/*============================*/ + lock_t* lock) /* in: waiting lock request */ +{ + ut_ad(mutex_own(&kernel_mutex)); + + if (lock_get_type(lock) == LOCK_REC) { + + lock_rec_dequeue_from_page(lock); + } else { + ut_ad(lock_get_type(lock) == LOCK_TABLE); + + lock_table_dequeue(lock); + } + + /* Reset the wait flag and the back pointer to lock in trx */ + + lock_reset_lock_and_trx_wait(lock); + + /* The following function releases the trx from lock wait */ + + trx_end_lock_wait(lock->trx); } /************************************************************************* @@ -3237,8 +3322,10 @@ lock_table_print( printf(" lock_mode IS"); } else if (lock_get_mode(lock) == LOCK_IX) { printf(" lock_mode IX"); + } else if (lock_get_mode(lock) == LOCK_AUTO_INC) { + printf(" lock_mode AUTO-INC"); } else { - ut_error; + printf(" unknown lock_mode %lu", lock_get_mode(lock)); } if (lock_get_wait(lock)) { @@ -3304,10 +3391,7 @@ lock_rec_print( page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET_IF_IN_POOL, -#ifdef UNIV_SYNC_DEBUG - IB__FILE__, __LINE__, -#endif - &mtr); + IB__FILE__, __LINE__, &mtr); if (page) { page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr); } @@ -3417,6 +3501,11 @@ loop: trx = UT_LIST_GET_FIRST(trx_sys->trx_list); i = 0; + + /* Since we temporarily release the kernel mutex when + reading a database page in below, variable trx may be + obsolete now and we must loop through the trx list to + get probably the same trx, or some other trx. */ while (trx && (i < nth_trx)) { trx = UT_LIST_GET_NEXT(trx_list, trx); @@ -3466,6 +3555,9 @@ loop: i = 0; + /* Look at the note about the trx loop above why we loop here: + lock may be an obsolete pointer now. */ + lock = UT_LIST_GET_FIRST(trx->trx_locks); while (lock && (i < nth_lock)) { diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c index beac63535ab..3213866e8a7 100644 --- a/innobase/log/log0log.c +++ b/innobase/log/log0log.c @@ -569,9 +569,12 @@ log_init(void) ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE); buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE); - log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE); + log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE); log_sys->buf_size = LOG_BUFFER_SIZE; + + memset(log_sys->buf, '\0', LOG_BUFFER_SIZE); + log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO - LOG_BUF_FLUSH_MARGIN; log_sys->check_flush_or_checkpoint = TRUE; @@ -579,6 +582,8 @@ log_init(void) log_sys->n_log_ios = 0; + log_sys->n_log_ios_old = log_sys->n_log_ios; + log_sys->last_printout_time = time(NULL); /*----------------------------*/ log_sys->buf_next_to_write = 0; @@ -609,6 +614,7 @@ log_init(void) log_sys->checkpoint_buf = ut_align( mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE); + memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE); /*----------------------------*/ log_sys->archiving_state = LOG_ARCH_ON; @@ -626,6 +632,8 @@ log_init(void) OS_FILE_LOG_BLOCK_SIZE); log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE; + memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); + log_sys->archiving_on = os_event_create(NULL); /*----------------------------*/ @@ -2791,8 +2799,35 @@ void log_print(void) /*===========*/ { - printf("Log sequence number %lu %lu\n", - ut_dulint_get_high(log_sys->lsn), - ut_dulint_get_low(log_sys->lsn)); -} + double time_elapsed; + time_t current_time; + + mutex_enter(&(log_sys->mutex)); + printf("Log sequence number %lu %lu\n" + "Log flushed up to %lu %lu\n" + "Last checkpoint at %lu %lu\n", + ut_dulint_get_high(log_sys->lsn), + ut_dulint_get_low(log_sys->lsn), + ut_dulint_get_high(log_sys->written_to_some_lsn), + ut_dulint_get_low(log_sys->written_to_some_lsn), + ut_dulint_get_high(log_sys->last_checkpoint_lsn), + ut_dulint_get_low(log_sys->last_checkpoint_lsn)); + + current_time = time(NULL); + + time_elapsed = difftime(current_time, log_sys->last_printout_time); + + printf( + "%lu pending log writes, %lu pending chkp writes\n" + "%lu log i/o's done, %.2f log i/o's/second\n", + log_sys->n_pending_writes, + log_sys->n_pending_checkpoint_writes, + log_sys->n_log_ios, + (log_sys->n_log_ios - log_sys->n_log_ios_old) / time_elapsed); + + log_sys->n_log_ios_old = log_sys->n_log_ios; + log_sys->last_printout_time = current_time; + + mutex_exit(&(log_sys->mutex)); +} diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index edab98fa39c..eb3eadcede9 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -560,6 +560,7 @@ recv_parse_or_apply_log_rec_body( } else if (type <= MLOG_WRITE_STRING) { new_ptr = mlog_parse_string(ptr, end_ptr, page); } else { + new_ptr = NULL; /* Eliminate compiler warning */ ut_error; } @@ -801,9 +802,7 @@ recv_recover_page( mtr_set_log_mode(&mtr, MTR_LOG_NONE); success = buf_page_get_known_nowait(RW_X_LATCH, page, BUF_KEEP_OLD, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif &mtr); ut_a(success); @@ -1212,9 +1211,7 @@ recv_compare_spaces( frame = buf_page_get_gen(space1, page_no, RW_S_LATCH, NULL, BUF_GET_IF_IN_POOL, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif &mtr); if (frame) { buf_page_dbg_add_level(frame, SYNC_NO_ORDER_CHECK); @@ -1227,9 +1224,7 @@ recv_compare_spaces( frame = buf_page_get_gen(space2, page_no, RW_S_LATCH, NULL, BUF_GET_IF_IN_POOL, -#ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, -#endif &mtr); if (frame) { buf_page_dbg_add_level(frame, SYNC_NO_ORDER_CHECK); @@ -2033,8 +2028,11 @@ recv_recovery_from_checkpoint_start( while (group) { old_scanned_lsn = recv_sys->scanned_lsn; - recv_group_scan_log_recs(group, &contiguous_lsn, + if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { + recv_group_scan_log_recs(group, &contiguous_lsn, &group_scanned_lsn); + } + group->scanned_lsn = group_scanned_lsn; if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) { @@ -2120,10 +2118,12 @@ recv_recovery_from_checkpoint_finish(void) { /* Rollback the uncommitted transactions which have no user session */ - trx_rollback_all_without_sess(); + if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { + trx_rollback_all_without_sess(); + } /* Apply the hashed log records to the respective file pages */ - + recv_apply_hashed_log_recs(TRUE); if (log_debug_writes) { diff --git a/innobase/mem/mem0pool.c b/innobase/mem/mem0pool.c index 6c3a4adebae..48e7e686953 100644 --- a/innobase/mem/mem0pool.c +++ b/innobase/mem/mem0pool.c @@ -76,7 +76,7 @@ pool, and after that its locks will grow into the buffer pool. */ #define MEM_AREA_FREE 1 /* The smallest memory area total size */ -#define MEM_AREA_MIN_SIZE (2 * sizeof(struct mem_area_struct)) +#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE) /* Data structure for a memory pool. The space is allocated using the buddy algorithm, where free list i contains areas of size 2 to power i. */ @@ -556,7 +556,7 @@ Returns the amount of reserved memory. */ ulint mem_pool_get_reserved( /*==================*/ - /* out: reserved mmeory in bytes */ + /* out: reserved memory in bytes */ mem_pool_t* pool) /* in: memory pool */ { ulint reserved; diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c index 11c0c476fcb..26f5a5d1cb7 100644 --- a/innobase/mtr/mtr0log.c +++ b/innobase/mtr/mtr0log.c @@ -54,6 +54,13 @@ mlog_write_initial_log_record( ut_ad(type <= MLOG_BIGGEST_TYPE); + if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { + fprintf(stderr, + "InnoDB: Error: trying to write to a stray memory location %lx\n", + (ulint)ptr); + ut_a(0); + } + log_ptr = mlog_open(mtr, 20); /* If no logging is requested, we may return now */ @@ -184,6 +191,13 @@ mlog_write_ulint( { byte* log_ptr; + if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { + fprintf(stderr, + "InnoDB: Error: trying to write to a stray memory location %lx\n", + (ulint)ptr); + ut_a(0); + } + if (type == MLOG_1BYTE) { mach_write_to_1(ptr, val); } else if (type == MLOG_2BYTES) { @@ -225,6 +239,13 @@ mlog_write_dulint( { byte* log_ptr; + if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { + fprintf(stderr, + "InnoDB: Error: trying to write to a stray memory location %lx\n", + (ulint)ptr); + ut_a(0); + } + ut_ad(ptr && mtr); ut_ad(type == MLOG_8BYTES); @@ -262,6 +283,12 @@ mlog_write_string( { byte* log_ptr; + if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { + fprintf(stderr, + "InnoDB: Error: trying to write to a stray memory location %lx\n", + (ulint)ptr); + ut_a(0); + } ut_ad(ptr && mtr); ut_ad(len < UNIV_PAGE_SIZE); diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index d4d30f6aabc..70813f2f7e1 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -11,6 +11,7 @@ Created 10/21/1995 Heikki Tuuri #include "ut0mem.h" #include "srv0srv.h" #include "trx0sys.h" +#include "fil0fil.h" #undef HAVE_FDATASYNC @@ -109,6 +110,14 @@ os_aio_array_t* os_aio_sync_array = NULL; ulint os_aio_n_segments = ULINT_UNDEFINED; +ulint os_n_file_reads = 0; +ulint os_n_file_writes = 0; +ulint os_n_fsyncs = 0; +ulint os_n_file_reads_old = 0; +ulint os_n_file_writes_old = 0; +ulint os_n_fsyncs_old = 0; +time_t os_last_printout; + /*************************************************************************** Gets the operating system version. Currently works only on Windows. */ @@ -118,26 +127,26 @@ os_get_os_version(void) /* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */ { #ifdef __WIN__ - OSVERSIONINFO os_info; - - os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - - ut_a(GetVersionEx(&os_info)); - - if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) { - return(OS_WIN31); - } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { - return(OS_WIN95); - } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { - return(OS_WINNT); - } else { - ut_error; - return(0); - } + OSVERSIONINFO os_info; + + os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + + ut_a(GetVersionEx(&os_info)); + + if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) { + return(OS_WIN31); + } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { + return(OS_WIN95); + } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { + return(OS_WINNT); + } else { + ut_error; + return(0); + } #else - ut_error; + ut_error; - return(0); + return(0); #endif } @@ -160,7 +169,7 @@ os_file_get_last_error(void) if (err != ERROR_FILE_EXISTS) { fprintf(stderr, - "InnoDB: operating system error number %li in a file operation.\n", + "InnoDB: Warning: operating system error number %li in a file operation.\n", (long) err); } @@ -178,7 +187,7 @@ os_file_get_last_error(void) if (err != EEXIST) { fprintf(stderr, - "InnoDB: operating system error number %i in a file operation.\n", + "InnoDB: Warning: operating system error number %i in a file operation.\n", errno); } @@ -231,8 +240,10 @@ os_file_handle_error( exit(1); } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) { - return(TRUE); + + } else if (err == OS_FILE_ALREADY_EXISTS) { + return(FALSE); } else { fprintf(stderr, "InnoDB: Cannot continue operation.\n"); @@ -317,14 +328,10 @@ try_again: if (file == INVALID_HANDLE_VALUE) { *success = FALSE; - if (create_mode != OS_FILE_OPEN - && os_file_get_last_error() == OS_FILE_DISK_FULL) { - - retry = os_file_handle_error(file, name); + retry = os_file_handle_error(file, name); - if (retry) { - goto try_again; - } + if (retry) { + goto try_again; } } else { *success = TRUE; @@ -369,14 +376,10 @@ try_again: if (file == -1) { *success = FALSE; - if (create_mode != OS_FILE_OPEN - && errno == ENOSPC) { + retry = os_file_handle_error(file, name); - retry = os_file_handle_error(file, name); - - if (retry) { - goto try_again; - } + if (retry) { + goto try_again; } } else { *success = TRUE; @@ -407,6 +410,7 @@ os_file_close( return(TRUE); } + os_file_handle_error(file, NULL); return(FALSE); #else int ret; @@ -414,6 +418,7 @@ os_file_close( ret = close(file); if (ret == -1) { + os_file_handle_error(file, NULL); return(FALSE); } @@ -551,6 +556,8 @@ os_file_flush( return(TRUE); } + os_file_handle_error(file, NULL); + return(FALSE); #else int ret; @@ -560,6 +567,8 @@ os_file_flush( #else ret = fsync(file); #endif + os_n_fsyncs++; + if (ret == 0) { return(TRUE); } @@ -589,6 +598,8 @@ os_file_pread( { off_t offs = (off_t)offset; + os_n_file_reads++; + #ifdef HAVE_PREAD return(pread(file, buf, n, offs)); #else @@ -631,6 +642,8 @@ os_file_pwrite( ssize_t ret; off_t offs = (off_t)offset; + os_n_file_writes++; + #ifdef HAVE_PWRITE ret = pwrite(file, buf, n, offs); @@ -702,12 +715,13 @@ os_file_read( BOOL ret; DWORD len; DWORD ret2; - DWORD err; DWORD low; DWORD high; ibool retry; ulint i; + os_n_file_reads++; + try_again: ut_ad(file); ut_ad(buf); @@ -724,7 +738,6 @@ try_again: ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - err = GetLastError(); os_mutex_exit(os_file_seek_mutexes[i]); @@ -738,8 +751,6 @@ try_again: if (ret && len == n) { return(TRUE); } - - err = GetLastError(); #else ibool retry; ssize_t ret; @@ -791,12 +802,12 @@ os_file_write( BOOL ret; DWORD len; DWORD ret2; - DWORD err; DWORD low; DWORD high; ibool retry; ulint i; + os_n_file_writes++; try_again: ut_ad(file); ut_ad(buf); @@ -813,7 +824,6 @@ try_again: ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - err = GetLastError(); os_mutex_exit(os_file_seek_mutexes[i]); @@ -987,6 +997,8 @@ os_aio_init( os_aio_segment_wait_events[i] = os_event_create(NULL); } + os_last_printout = time(NULL); + #ifdef POSIX_ASYNC_IO /* Block aio signals from the current thread and its children: for this to work, the current thread must be the first created @@ -1461,6 +1473,7 @@ try_again: } else if (mode == OS_AIO_SYNC) { array = os_aio_sync_array; } else { + array = NULL; /* Eliminate compiler warning */ ut_error; } @@ -1469,6 +1482,7 @@ try_again: if (type == OS_FILE_READ) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO + os_n_file_reads++; ret = ReadFile(file, buf, (DWORD)n, &len, &(slot->control)); #elif defined(POSIX_ASYNC_IO) @@ -1485,6 +1499,7 @@ try_again: } else if (type == OS_FILE_WRITE) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO + os_n_file_writes++; ret = WriteFile(file, buf, (DWORD)n, &len, &(slot->control)); #elif defined(POSIX_ASYNC_IO) @@ -1583,7 +1598,6 @@ os_aio_windows_handle( ulint n; ulint i; ibool ret_val; - ulint err; BOOL ret; DWORD len; @@ -1635,7 +1649,8 @@ os_aio_windows_handle( ut_a(TRUE == os_file_flush(slot->file)); } } else { - err = GetLastError(); + os_file_get_last_error(); + ut_error; ret_val = FALSE; @@ -2032,6 +2047,8 @@ os_aio_print(void) os_aio_array_t* array; os_aio_slot_t* slot; ulint n_reserved; + time_t current_time; + double time_elapsed; ulint i; for (i = 0; i < srv_n_file_io_threads; i++) { @@ -2039,7 +2056,7 @@ os_aio_print(void) srv_io_thread_op_info[i]); } - printf("Pending normal aio reads: "); + printf("Pending normal aio reads:"); array = os_aio_read_array; loop: @@ -2066,12 +2083,12 @@ loop: ut_a(array->n_reserved == n_reserved); - printf("%lu\n", n_reserved); + printf(" %lu", n_reserved); os_mutex_exit(array->mutex); if (array == os_aio_read_array) { - printf("Pending aio writes: "); + printf(", aio writes:"); array = os_aio_write_array; @@ -2079,25 +2096,48 @@ loop: } if (array == os_aio_write_array) { - printf("Pending insert buffer aio reads: "); + printf(",\n ibuf aio reads:"); array = os_aio_ibuf_array; goto loop; } if (array == os_aio_ibuf_array) { - printf("Pending log writes or reads: "); + printf(", log i/o's:"); array = os_aio_log_array; goto loop; } if (array == os_aio_log_array) { - printf("Pending synchronous reads or writes: "); + printf(", sync i/o's:"); array = os_aio_sync_array; goto loop; } + + printf("\n"); + + current_time = time(NULL); + time_elapsed = difftime(current_time, os_last_printout); + + printf("Pending flushes (fsync) log: %lu; buffer pool: %lu\n", + fil_n_pending_log_flushes, fil_n_pending_tablespace_flushes); + printf("%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n", + os_n_file_reads, os_n_file_writes, os_n_fsyncs); + printf("%.2f reads/s, %.2f writes/s, %.2f fsyncs/s\n", + (os_n_file_reads - os_n_file_reads_old) + / time_elapsed, + (os_n_file_writes - os_n_file_writes_old) + / time_elapsed, + (os_n_fsyncs - os_n_fsyncs_old) + / time_elapsed); + + os_n_file_reads_old = os_n_file_reads; + os_n_file_writes_old = os_n_file_writes; + os_n_fsyncs_old = os_n_fsyncs; + + os_last_printout = current_time; } /************************************************************************** diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c index 511191ecd89..427064bc89c 100644 --- a/innobase/page/page0page.c +++ b/innobase/page/page0page.c @@ -1258,8 +1258,8 @@ page_validate( page_dir_get_nth_slot(page, n_slots - 1))) { fprintf(stderr, "Record heap and dir overlap on a page in index %s, %lu, %lu\n", - index->name, page_header_get_ptr(page, PAGE_HEAP_TOP), - page_dir_get_nth_slot(page, n_slots - 1)); + index->name, (ulint)page_header_get_ptr(page, PAGE_HEAP_TOP), + (ulint)page_dir_get_nth_slot(page, n_slots - 1)); goto func_exit; } diff --git a/innobase/pars/lexyy.c b/innobase/pars/lexyy.c index 64b8963028b..67bd12afa60 100644 --- a/innobase/pars/lexyy.c +++ b/innobase/pars/lexyy.c @@ -7362,7 +7362,7 @@ static void *yy_flex_alloc( size ) unsigned int size; #endif { - return (void *) malloc( size ); + return (void *) mem_alloc( size ); } #ifdef YY_USE_PROTOS @@ -7373,7 +7373,7 @@ void *ptr; unsigned int size; #endif { - return (void *) realloc( ptr, size ); + return (void *) mem_realloc( ptr, size ); } #ifdef YY_USE_PROTOS @@ -7383,6 +7383,6 @@ static void yy_flex_free( ptr ) void *ptr; #endif { - free( ptr ); + mem_free( ptr ); } diff --git a/innobase/pars/pars0grm.c b/innobase/pars/pars0grm.c index e06cba4e69d..1f631c69f41 100644 --- a/innobase/pars/pars0grm.c +++ b/innobase/pars/pars0grm.c @@ -97,11 +97,8 @@ que_node_t */ #define YYSTYPE que_node_t* #include "univ.i" -#undef alloca -#define alloca mem_alloc #include <math.h> #include "pars0pars.h" -#include "mem0mem.h" #include "que0types.h" #include "que0que.h" #include "row0sel.h" @@ -705,7 +702,7 @@ int yydebug; /* nonzero means print parse trace */ /* YYINITDEPTH indicates the initial size of the parser's stacks */ #ifndef YYINITDEPTH -#define YYINITDEPTH 200 +#define YYINITDEPTH 1000 #endif /* YYMAXDEPTH is the maximum size the stacks can grow to @@ -896,17 +893,22 @@ yynewstate: if (yystacksize >= YYMAXDEPTH) { yyerror("parser stack overflow"); + ut_a(0); return 2; } yystacksize *= 2; if (yystacksize > YYMAXDEPTH) yystacksize = YYMAXDEPTH; - yyss = (short *) alloca (yystacksize * sizeof (*yyssp)); + + ut_a(0); /* Prevent possible memory leaks through the following + mem_alloc's */ + + yyss = (short *) mem_alloc (yystacksize * sizeof (*yyssp)); __yy_memcpy ((char *)yyss, (char *)yyss1, size * sizeof (*yyssp)); - yyvs = (YYSTYPE *) alloca (yystacksize * sizeof (*yyvsp)); + yyvs = (YYSTYPE *) mem_alloc (yystacksize * sizeof (*yyvsp)); __yy_memcpy ((char *)yyvs, (char *)yyvs1, size * sizeof (*yyvsp)); #ifdef YYLSP_NEEDED - yyls = (YYLTYPE *) alloca (yystacksize * sizeof (*yylsp)); + yyls = (YYLTYPE *) mem_alloc (yystacksize * sizeof (*yylsp)); __yy_memcpy ((char *)yyls, (char *)yyls1, size * sizeof (*yylsp)); #endif #endif /* no yyoverflow */ @@ -1663,7 +1665,7 @@ yyerrlab: /* here on detecting error */ x < (sizeof(yytname) / sizeof(char *)); x++) if (yycheck[x + yyn] == x) size += strlen(yytname[x]) + 15, count++; - msg = (char *) malloc(size + 15); + msg = (char *) mem_alloc(size + 15); if (msg != 0) { strcpy(msg, "parse error"); @@ -1682,7 +1684,7 @@ yyerrlab: /* here on detecting error */ } } yyerror(msg); - free(msg); + mem_free(msg); } else yyerror ("parse error; also virtual memory exceeded"); diff --git a/innobase/pars/pars0opt.c b/innobase/pars/pars0opt.c index 5d187ad2faf..6f4957f96ee 100644 --- a/innobase/pars/pars0opt.c +++ b/innobase/pars/pars0opt.c @@ -543,6 +543,7 @@ opt_search_plan_for_table( /* Calculate goodness for each index of the table */ index = dict_table_get_first_index(table); + best_index = index; /* Eliminate compiler warning */ best_goodness = 0; while (index) { diff --git a/innobase/pars/pars0pars.c b/innobase/pars/pars0pars.c index 4a298426476..8ffbca579b8 100644 --- a/innobase/pars/pars0pars.c +++ b/innobase/pars/pars0pars.c @@ -922,7 +922,8 @@ pars_process_assign_list( changes_ord_field = UPD_NODE_NO_ORD_CHANGE; - if (row_upd_changes_some_index_ord_field(node->table, node->update)) { + if (row_upd_changes_some_index_ord_field_binary(node->table, + node->update)) { changes_ord_field = 0; } diff --git a/innobase/que/que0que.c b/innobase/que/que0que.c index ddf8c8ebc43..96e505f8b80 100644 --- a/innobase/que/que0que.c +++ b/innobase/que/que0que.c @@ -832,7 +832,7 @@ que_thr_dec_refer_count( sess_t* sess; ibool send_srv_msg = FALSE; ibool release_stored_proc = FALSE; - ulint msg_len; + ulint msg_len = 0; byte msg_buf[ODBC_DATAGRAM_SIZE]; ulint fork_type; ibool stopped; diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c index cdf1f363946..c3687ebb0e0 100644 --- a/innobase/rem/rem0cmp.c +++ b/innobase/rem/rem0cmp.c @@ -12,6 +12,8 @@ Created 7/1/1994 Heikki Tuuri #include "rem0cmp.ic" #endif +#include "srv0srv.h" + /* ALPHABETICAL ORDER ================== @@ -68,6 +70,54 @@ innobase_mysql_cmp( unsigned int b_length); /* in: data field length, not UNIV_SQL_NULL */ +/************************************************************************* +Transforms the character code so that it is ordered appropriately for the +language. This is only used for the latin1 char set. MySQL does the +comparisons for other char sets. */ +UNIV_INLINE +ulint +cmp_collate( +/*========*/ + /* out: collation order position */ + dtype_t* type, /* in: type */ + ulint code) /* in: code of a character stored in database + record */ +{ + ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR)); + + return((ulint) srv_latin1_ordering[code]); +} + + +/***************************************************************** +Returns TRUE if two types are equal for comparison purposes. */ + +ibool +cmp_types_are_equal( +/*================*/ + /* out: TRUE if the types are considered + equal in comparisons */ + dtype_t* type1, /* in: type 1 */ + dtype_t* type2) /* in: type 2 */ +{ + if (type1->mtype != type2->mtype) { + + return(FALSE); + } + + if (type1->mtype == DATA_MYSQL + || type1->mtype == DATA_VARMYSQL) { + + if ((type1->prtype & ~DATA_NOT_NULL) + != (type2->prtype & ~DATA_NOT_NULL)) { + + return(FALSE); + } + } + + return(TRUE); +} + /***************************************************************** Innobase uses this function is to compare two data fields for which the data type is such that we must compare whole fields. */ @@ -269,8 +319,8 @@ cmp_data_data_slow( } if (cur_type->mtype <= DATA_CHAR) { - data1_byte = dtype_collate(cur_type, data1_byte); - data2_byte = dtype_collate(cur_type, data2_byte); + data1_byte = cmp_collate(cur_type, data1_byte); + data2_byte = cmp_collate(cur_type, data2_byte); } if (data1_byte > data2_byte) { @@ -482,8 +532,8 @@ cmp_dtuple_rec_with_match( } if (cur_type->mtype <= DATA_CHAR) { - rec_byte = dtype_collate(cur_type, rec_byte); - dtuple_byte = dtype_collate(cur_type, + rec_byte = cmp_collate(cur_type, rec_byte); + dtuple_byte = cmp_collate(cur_type, dtuple_byte); } @@ -796,8 +846,8 @@ cmp_rec_rec_with_match( } if (cur_type->mtype <= DATA_CHAR) { - rec1_byte = dtype_collate(cur_type, rec1_byte); - rec2_byte = dtype_collate(cur_type, rec2_byte); + rec1_byte = cmp_collate(cur_type, rec1_byte); + rec2_byte = cmp_collate(cur_type, rec2_byte); } if (rec1_byte < rec2_byte) { diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c index 8542dcae326..92cac5a55cf 100644 --- a/innobase/row/row0ins.c +++ b/innobase/row/row0ins.c @@ -207,16 +207,33 @@ row_ins_sec_index_entry_by_modify( /*==============================*/ /* out: DB_SUCCESS or error code */ btr_cur_t* cursor, /* in: B-tree cursor */ + dtuple_t* entry, /* in: index entry to insert */ que_thr_t* thr, /* in: query thread */ mtr_t* mtr) /* in: mtr */ { - ulint err; - - ut_ad(((cursor->index)->type & DICT_CLUSTERED) == 0); - ut_ad(rec_get_deleted_flag(btr_cur_get_rec(cursor))); + mem_heap_t* heap; + upd_t* update; + rec_t* rec; + ulint err; + + rec = btr_cur_get_rec(cursor); + + ut_ad((cursor->index->type & DICT_CLUSTERED) == 0); + ut_ad(rec_get_deleted_flag(rec)); - /* We just remove the delete mark from the secondary index record */ - err = btr_cur_del_mark_set_sec_rec(0, cursor, FALSE, thr, mtr); + /* We know that in the ordering entry and rec are identified. + But in their binary form there may be differences if there + are char fields in them. Therefore we have to calculate the + difference and do an update-in-place if necessary. */ + + heap = mem_heap_create(1024); + + update = row_upd_build_sec_rec_difference_binary(cursor->index, + entry, rec, heap); + + err = btr_cur_update_sec_rec_in_place(cursor, update, thr, mtr); + + mem_heap_free(heap); return(err); } @@ -262,7 +279,7 @@ row_ins_clust_index_entry_by_modify( /* Build an update vector containing all the fields to be modified; NOTE that this vector may contain also system columns! */ - update = row_upd_build_difference(cursor->index, entry, ext_vec, + update = row_upd_build_difference_binary(cursor->index, entry, ext_vec, n_ext_vec, rec, heap); if (mode == BTR_MODIFY_LEAF) { /* Try optimistic updating of the record, keeping changes @@ -348,6 +365,203 @@ row_ins_set_shared_rec_lock( } /******************************************************************* +Checks if foreign key constraint fails for an index entry. Sets shared locks +which lock either the success or the failure of the constraint. NOTE that +the caller must have a shared latch on dict_foreign_key_check_lock. */ + +ulint +row_ins_check_foreign_constraint( +/*=============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_NO_REFERENCED_ROW, + or DB_ROW_IS_REFERENCED */ + ibool check_ref,/* in: TRUE If we want to check that + the referenced table is ok, FALSE if we + want to to check the foreign key table */ + dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the + tables mentioned in it must be in the + dictionary cache if they exist at all */ + dict_table_t* table, /* in: if check_ref is TRUE, then the foreign + table, else the referenced table */ + dict_index_t* index, /* in: index in table */ + dtuple_t* entry, /* in: index entry for index */ + que_thr_t* thr) /* in: query thread */ +{ + dict_table_t* check_table; + dict_index_t* check_index; + ulint n_fields_cmp; + rec_t* rec; + btr_pcur_t pcur; + ibool moved; + int cmp; + ulint err; + mtr_t mtr; + + ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED)); + + if (check_ref) { + check_table = foreign->referenced_table; + check_index = foreign->referenced_index; + } else { + check_table = foreign->foreign_table; + check_index = foreign->foreign_index; + } + + if (check_table == NULL) { + if (check_ref) { + return(DB_NO_REFERENCED_ROW); + } + + return(DB_SUCCESS); + } + + ut_a(check_table && check_index); + + if (check_table != table) { + /* We already have a LOCK_IX on table, but not necessarily + on check_table */ + + err = lock_table(0, check_table, LOCK_IS, thr); + + if (err != DB_SUCCESS) { + + return(err); + } + } + + mtr_start(&mtr); + + /* Store old value on n_fields_cmp */ + + n_fields_cmp = dtuple_get_n_fields_cmp(entry); + + dtuple_set_n_fields_cmp(entry, foreign->n_fields); + + btr_pcur_open(check_index, entry, PAGE_CUR_GE, + BTR_SEARCH_LEAF, &pcur, &mtr); + + /* Scan index records and check if there is a matching record */ + + for (;;) { + rec = btr_pcur_get_rec(&pcur); + + if (rec == page_get_infimum_rec(buf_frame_align(rec))) { + + goto next_rec; + } + + /* Try to place a lock on the index record */ + + err = row_ins_set_shared_rec_lock(rec, check_index, thr); + + if (err != DB_SUCCESS) { + + break; + } + + if (rec == page_get_supremum_rec(buf_frame_align(rec))) { + + goto next_rec; + } + + cmp = cmp_dtuple_rec(entry, rec); + + if (cmp == 0) { + if (!rec_get_deleted_flag(rec)) { + /* Found a matching record */ + + if (check_ref) { + err = DB_SUCCESS; + } else { + err = DB_ROW_IS_REFERENCED; + } + + break; + } + } + + if (cmp < 0) { + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; + } + + break; + } + + ut_a(cmp == 0); +next_rec: + moved = btr_pcur_move_to_next(&pcur, &mtr); + + if (!moved) { + if (check_ref) { + err = DB_NO_REFERENCED_ROW; + } else { + err = DB_SUCCESS; + } + + break; + } + } + + mtr_commit(&mtr); + + /* Restore old value */ + dtuple_set_n_fields_cmp(entry, n_fields_cmp); + + return(err); +} + +/******************************************************************* +Checks if foreign key constraints fail for an index entry. If index +is not mentioned in any constraint, this function does nothing, +Otherwise does searches to the indexes of referenced tables and +sets shared locks which lock either the success or the failure of +a constraint. */ +static +ulint +row_ins_check_foreign_constraints( +/*==============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, or error + code */ + dict_table_t* table, /* in: table */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: index entry for index */ + que_thr_t* thr) /* in: query thread */ +{ + dict_foreign_t* foreign; + ulint err; + + foreign = UT_LIST_GET_FIRST(table->foreign_list); + + while (foreign) { + if (foreign->foreign_index == index) { + + if (foreign->referenced_table == NULL) { + dict_table_get(foreign->referenced_table_name, + thr_get_trx(thr)); + } + + rw_lock_s_lock(&dict_foreign_key_check_lock); + + err = row_ins_check_foreign_constraint(TRUE, foreign, + table, index, entry, thr); + + rw_lock_s_unlock(&dict_foreign_key_check_lock); + + if (err != DB_SUCCESS) { + return(err); + } + } + + foreign = UT_LIST_GET_NEXT(foreign_list, foreign); + } + + return(DB_SUCCESS); +} + +/******************************************************************* Scans a unique non-clustered index at a given index entry to determine whether a uniqueness violation has occurred for the key value of the entry. Set shared locks on possible duplicate records. */ @@ -365,7 +579,6 @@ row_ins_scan_sec_index_for_duplicate( ulint n_fields_cmp; rec_t* rec; btr_pcur_t pcur; - trx_t* trx = thr_get_trx(thr); ulint err = DB_SUCCESS; ibool moved; mtr_t mtr; @@ -414,7 +627,7 @@ row_ins_scan_sec_index_for_duplicate( err = DB_DUPLICATE_KEY; - trx->error_info = index; + thr_get_trx(thr)->error_info = index; break; } @@ -699,7 +912,7 @@ row_ins_index_entry_low( ext_vec, n_ext_vec, thr, &mtr); } else { - err = row_ins_sec_index_entry_by_modify(&cursor, + err = row_ins_sec_index_entry_by_modify(&cursor, entry, thr, &mtr); } @@ -765,6 +978,15 @@ row_ins_index_entry( { ulint err; + if (UT_LIST_GET_FIRST(index->table->foreign_list)) { + err = row_ins_check_foreign_constraints(index->table, index, + entry, thr); + if (err != DB_SUCCESS) { + + return(err); + } + } + /* Try first optimistic descent to the B-tree */ err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, @@ -812,7 +1034,7 @@ row_ins_index_entry_set_vals( /*************************************************************** Inserts a single index entry to the table. */ -UNIV_INLINE +static ulint row_ins_index_entry_step( /*=====================*/ diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 373ee4ac4bd..13c0332dcef 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -21,6 +21,7 @@ Created 9/17/2000 Heikki Tuuri #include "pars0pars.h" #include "dict0dict.h" #include "dict0crea.h" +#include "dict0load.h" #include "trx0roll.h" #include "trx0purge.h" #include "lock0lock.h" @@ -151,7 +152,7 @@ row_mysql_handle_errors( during the function entry */ trx_t* trx, /* in: transaction */ que_thr_t* thr, /* in: query thread */ - trx_savept_t* savept) /* in: savepoint */ + trx_savept_t* savept) /* in: savepoint or NULL */ { ibool timeout_expired; ulint err; @@ -172,12 +173,16 @@ handle_new_error: } } else if (err == DB_TOO_BIG_RECORD) { /* MySQL will roll back the latest SQL statement */ + } else if (err == DB_ROW_IS_REFERENCED + || err == DB_NO_REFERENCED_ROW + || err == DB_CANNOT_ADD_CONSTRAINT) { + /* MySQL will roll back the latest SQL statement */ } else if (err == DB_LOCK_WAIT) { timeout_expired = srv_suspend_mysql_thread(thr); if (timeout_expired) { - trx->error_state = DB_DEADLOCK; + trx->error_state = DB_LOCK_WAIT_TIMEOUT; que_thr_stop_for_mysql(thr); @@ -188,9 +193,12 @@ handle_new_error: return(TRUE); - } else if (err == DB_DEADLOCK) { - /* MySQL will roll back the latest SQL statement */ + } else if (err == DB_DEADLOCK || err == DB_LOCK_WAIT_TIMEOUT) { + /* Roll back the whole transaction; this resolution was added + to version 3.23.43 */ + trx_general_rollback_for_mysql(trx, FALSE, NULL); + } else if (err == DB_OUT_OF_FILE_SPACE) { /* MySQL will roll back the latest SQL statement */ @@ -203,6 +211,7 @@ handle_new_error: exit(1); } else { + fprintf(stderr, "InnoDB: unknown error code %lu\n", err); ut_a(0); } @@ -440,7 +449,94 @@ row_update_statistics_if_needed( dict_update_statistics(prebuilt->table); } } + +/************************************************************************* +Unlocks an AUTO_INC type lock possibly reserved by trx. */ + +void +row_unlock_table_autoinc_for_mysql( +/*===============================*/ + trx_t* trx) /* in: transaction */ +{ + if (!trx->auto_inc_lock) { + + return; + } + + lock_table_unlock_auto_inc(trx); +} + +/************************************************************************* +Sets an AUTO_INC type lock on the table mentioned in prebuilt. The +AUTO_INC lock gives exclusive access to the auto-inc counter of the +table. The lock is reserved only for the duration of an SQL statement. +It is not compatible with another AUTO_INC or exclusive lock on the +table. */ + +int +row_lock_table_autoinc_for_mysql( +/*=============================*/ + /* out: error code or DB_SUCCESS */ + row_prebuilt_t* prebuilt) /* in: prebuilt struct in the MySQL + table handle */ +{ + trx_t* trx = prebuilt->trx; + ins_node_t* node = prebuilt->ins_node; + que_thr_t* thr; + ulint err; + ibool was_lock_wait; + + ut_ad(trx); + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + + trx->op_info = "setting auto-inc lock"; + + if (node == NULL) { + row_get_prebuilt_insert_row(prebuilt); + node = prebuilt->ins_node; + } + + /* We use the insert query graph as the dummy graph needed + in the lock module call */ + + thr = que_fork_get_first_thr(prebuilt->ins_graph); + + que_thr_move_to_run_state_for_mysql(thr, trx); + +run_again: + thr->run_node = node; + thr->prev_node = node; + + /* It may be that the current session has not yet started + its transaction, or it has been committed: */ + + trx_start_if_not_started(trx); + + err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr); + + trx->error_state = err; + + if (err != DB_SUCCESS) { + que_thr_stop_for_mysql(thr); + + was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL); + + if (was_lock_wait) { + goto run_again; + } + + trx->op_info = ""; + + return(err); + } + + que_thr_stop_for_mysql_no_error(thr, trx); + + trx->op_info = ""; + return((int) err); +} + /************************************************************************* Does an insert for MySQL. */ @@ -462,6 +558,17 @@ row_insert_for_mysql( ut_ad(trx); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "inserting"; if (node == NULL) { @@ -634,6 +741,17 @@ row_update_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); UT_NOT_USED(mysql_rec); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "updating or deleting"; node = prebuilt->upd_node; @@ -816,8 +934,69 @@ row_create_table_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "creating table"; + namelen = ut_strlen(table->name); + + keywordlen = ut_strlen("innodb_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_monitor", keywordlen)) { + + /* Table name ends to characters innodb_monitor: + start monitor prints */ + + srv_print_innodb_monitor = TRUE; + + /* The lock timeout monitor thread also takes care + of InnoDB monitor prints */ + + os_event_set(srv_lock_timeout_thread_event); + } + + keywordlen = ut_strlen("innodb_lock_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_lock_monitor", keywordlen)) { + + srv_print_innodb_monitor = TRUE; + srv_print_innodb_lock_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + } + + keywordlen = ut_strlen("innodb_tablespace_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_tablespace_monitor", keywordlen)) { + + srv_print_innodb_tablespace_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + } + + keywordlen = ut_strlen("innodb_table_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(table->name + namelen - keywordlen, + "innodb_table_monitor", keywordlen)) { + + srv_print_innodb_table_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + } + /* Serialize data dictionary operations with dictionary mutex: no deadlocks can occur then in these operations */ @@ -845,9 +1024,12 @@ row_create_table_for_mysql( trx_general_rollback_for_mysql(trx, FALSE, NULL); if (err == DB_OUT_OF_FILE_SPACE) { + fprintf(stderr, + "InnoDB: Warning: cannot create table %s because tablespace full\n", + table->name); row_drop_table_for_mysql(table->name, trx, TRUE); } else { - assert(err == DB_DUPLICATE_KEY); + ut_a(err == DB_DUPLICATE_KEY); fprintf(stderr, "InnoDB: Error: table %s already exists in InnoDB internal\n" "InnoDB: data dictionary. Have you deleted the .frm file\n" @@ -864,39 +1046,6 @@ row_create_table_for_mysql( } trx->error_state = DB_SUCCESS; - } else { - namelen = ut_strlen(table->name); - - keywordlen = ut_strlen("innodb_monitor"); - - if (namelen >= keywordlen - && 0 == ut_memcmp(table->name + namelen - keywordlen, - "innodb_monitor", keywordlen)) { - - /* Table name ends to characters innodb_monitor: - start monitor prints */ - - srv_print_innodb_monitor = TRUE; - } - - keywordlen = ut_strlen("innodb_lock_monitor"); - - if (namelen >= keywordlen - && 0 == ut_memcmp(table->name + namelen - keywordlen, - "innodb_lock_monitor", keywordlen)) { - - srv_print_innodb_monitor = TRUE; - srv_print_innodb_lock_monitor = TRUE; - } - - keywordlen = ut_strlen("innodb_tablespace_monitor"); - - if (namelen >= keywordlen - && 0 == ut_memcmp(table->name + namelen - keywordlen, - "innodb_tablespace_monitor", keywordlen)) { - - srv_print_innodb_tablespace_monitor = TRUE; - } } mutex_exit(&(dict_sys->mutex)); @@ -970,6 +1119,65 @@ row_create_index_for_mysql( } /************************************************************************* +Scans a table create SQL string and adds to the data dictionary +the foreign key constraints declared in the string. This function +should be called after the indexes for a table have been created. +Each foreign key constraint must be accompanied with indexes in +bot participating tables. The indexes are allowed to contain more +fields than mentioned in the constraint. Check also that foreign key +constraints which reference this table are ok. */ + +int +row_table_add_foreign_constraints( +/*==============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the database + name before it: test.table2 */ + char* name) /* in: table full name in the normalized form + database_name/table_name */ +{ + ulint err; + + ut_a(sql_string); + + trx->op_info = "adding foreign keys"; + + /* Serialize data dictionary operations with dictionary mutex: + no deadlocks can occur then in these operations */ + + mutex_enter(&(dict_sys->mutex)); + + trx->dict_operation = TRUE; + + err = dict_create_foreign_constraints(trx, sql_string, name); + + if (err == DB_SUCCESS) { + /* Check that also referencing constraints are ok */ + err = dict_load_foreigns(name); + } + + if (err != DB_SUCCESS) { + /* We have special error handling here */ + + trx->error_state = DB_SUCCESS; + + trx_general_rollback_for_mysql(trx, FALSE, NULL); + + row_drop_table_for_mysql(name, trx, TRUE); + + trx->error_state = DB_SUCCESS; + } + + mutex_exit(&(dict_sys->mutex)); + + return((int) err); +} + +/************************************************************************* Drops a table for MySQL. If the name of the dropped table ends to characters INNODB_MONITOR, then this also stops printing of monitor output by the master thread. */ @@ -997,6 +1205,17 @@ row_drop_table_for_mysql( ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); ut_a(name != NULL); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "dropping table"; namelen = ut_strlen(name); @@ -1032,6 +1251,15 @@ row_drop_table_for_mysql( srv_print_innodb_tablespace_monitor = FALSE; } + keywordlen = ut_strlen("innodb_table_monitor"); + + if (namelen >= keywordlen + && 0 == ut_memcmp(name + namelen - keywordlen, + "innodb_table_monitor", keywordlen)) { + + srv_print_innodb_table_monitor = FALSE; + } + /* We use the private SQL parser of Innobase to generate the query graphs needed in deleting the dictionary data from system tables in Innobase. Deleting a row from SYS_INDEXES table also @@ -1039,21 +1267,49 @@ row_drop_table_for_mysql( str1 = "PROCEDURE DROP_TABLE_PROC () IS\n" + "table_name CHAR;\n" + "sys_foreign_id CHAR;\n" "table_id CHAR;\n" "index_id CHAR;\n" + "foreign_id CHAR;\n" "found INT;\n" "BEGIN\n" - "SELECT ID INTO table_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME ='"; - + "table_name := '"; + str2 = "';\n" + "SELECT ID INTO table_id\n" + "FROM SYS_TABLES\n" + "WHERE NAME = table_name;\n" "IF (SQL % NOTFOUND) THEN\n" " COMMIT WORK;\n" " RETURN;\n" "END IF;\n" "found := 1;\n" + "SELECT ID INTO sys_foreign_id\n" + "FROM SYS_TABLES\n" + "WHERE NAME = 'SYS_FOREIGN';\n" + "IF (SQL % NOTFOUND) THEN\n" + " found := 0;\n" + "END IF;\n" + "IF (table_name = 'SYS_FOREIGN') THEN\n" + " found := 0;\n" + "END IF;\n" + "IF (table_name = 'SYS_FOREIGN_COLS') THEN\n" + " found := 0;\n" + "END IF;\n" + "WHILE found = 1 LOOP\n" + " SELECT ID INTO foreign_id\n" + " FROM SYS_FOREIGN\n" + " WHERE FOR_NAME = table_name;\n" + " IF (SQL % NOTFOUND) THEN\n" + " found := 0;\n" + " ELSE" + " DELETE FROM SYS_FOREIGN_COLS WHERE ID = foreign_id;\n" + " DELETE FROM SYS_FOREIGN WHERE ID = foreign_id;\n" + " END IF;\n" + "END LOOP;\n" + "found := 1;\n" "WHILE found = 1 LOOP\n" " SELECT ID INTO index_id\n" " FROM SYS_INDEXES\n" @@ -1095,6 +1351,9 @@ row_drop_table_for_mysql( graph->fork_type = QUE_FORK_MYSQL_INTERFACE; + /* Prevent foreign key checks while we are dropping the table */ + rw_lock_x_lock(&(dict_foreign_key_check_lock)); + /* Prevent purge from running while we are dropping the table */ rw_lock_s_lock(&(purge_sys->purge_is_running)); @@ -1103,6 +1362,12 @@ row_drop_table_for_mysql( if (!table) { err = DB_TABLE_NOT_FOUND; + fprintf(stderr, + "InnoDB: Error: table %s does not exist in the InnoDB internal\n" + "InnoDB: data dictionary though MySQL is trying to drop it.\n" + "InnoDB: Have you copied the .frm file of the table to the\n" + "InnoDB: MySQL database directory from another database?\n", + name); goto funct_exit; } @@ -1138,6 +1403,8 @@ row_drop_table_for_mysql( funct_exit: rw_lock_s_unlock(&(purge_sys->purge_is_running)); + rw_lock_x_unlock(&(dict_foreign_key_check_lock)); + if (!has_dict_mutex) { mutex_exit(&(dict_sys->mutex)); } @@ -1150,6 +1417,49 @@ funct_exit: } /************************************************************************* +Drops a database for MySQL. */ + +int +row_drop_database_for_mysql( +/*========================*/ + /* out: error code or DB_SUCCESS */ + char* name, /* in: database name which ends to '/' */ + trx_t* trx) /* in: transaction handle */ +{ + char* table_name; + int err = DB_SUCCESS; + + ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); + ut_a(name != NULL); + ut_a(name[strlen(name) - 1] == '/'); + + trx->op_info = "dropping database"; + + mutex_enter(&(dict_sys->mutex)); + + while (table_name = dict_get_first_table_name_in_db(name)) { + ut_a(memcmp(table_name, name, strlen(name)) == 0); + + err = row_drop_table_for_mysql(table_name, trx, TRUE); + + mem_free(table_name); + + if (err != DB_SUCCESS) { + fprintf(stderr, + "InnoDB: DROP DATABASE %s failed with error %lu for table %s\n", + name, (ulint)err, table_name); + break; + } + } + + mutex_exit(&(dict_sys->mutex)); + + trx->op_info = ""; + + return(err); +} + +/************************************************************************* Renames a table for MySQL. */ int @@ -1174,18 +1484,37 @@ row_rename_table_for_mysql( ut_a(old_name != NULL); ut_a(new_name != NULL); + if (srv_created_new_raw || srv_force_recovery) { + fprintf(stderr, + "InnoDB: A new raw disk partition was initialized or\n" + "InnoDB: innodb_force_recovery is on: we do not allow\n" + "InnoDB: database modifications by the user. Shut down\n" + "InnoDB: mysqld and edit my.cnf so that newraw is replaced\n" + "InnoDB: with raw, and innodb_force_... is removed.\n"); + + return(DB_ERROR); + } + trx->op_info = "renaming table"; str1 = "PROCEDURE RENAME_TABLE_PROC () IS\n" + "new_table_name CHAR;\n" + "old_table_name CHAR;\n" "BEGIN\n" - "UPDATE SYS_TABLES SET NAME ='"; + "new_table_name :='"; str2 = - "' WHERE NAME = '"; + "';\nold_table_name := '"; str3 = "';\n" + "UPDATE SYS_TABLES SET NAME = new_table_name\n" + "WHERE NAME = old_table_name;\n" + "UPDATE SYS_FOREIGN SET FOR_NAME = new_table_name\n" + "WHERE FOR_NAME = old_table_name;\n" + "UPDATE SYS_FOREIGN SET REF_NAME = new_table_name\n" + "WHERE REF_NAME = old_table_name;\n" "COMMIT WORK;\n" "END;\n"; @@ -1356,7 +1685,7 @@ row_check_table_for_mysql( dict_table_t* table = prebuilt->table; dict_index_t* index; ulint n_rows; - ulint n_rows_in_table; + ulint n_rows_in_table = ULINT_UNDEFINED; ulint ret = DB_SUCCESS; prebuilt->trx->op_info = "checking table"; diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c index 43bc166347a..0dffa273938 100644 --- a/innobase/row/row0purge.c +++ b/innobase/row/row0purge.c @@ -220,7 +220,7 @@ row_purge_remove_sec_if_poss_low( if (!found) { /* Not found */ - /* FIXME: printf("PURGE:........sec entry not found\n"); */ + /* printf("PURGE:........sec entry not found\n"); */ /* dtuple_print(entry); */ btr_pcur_close(&pcur); @@ -382,7 +382,7 @@ row_purge_upd_exist_or_extern( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field(NULL, node->index, + if (row_upd_changes_ord_field_binary(NULL, node->index, node->update)) { /* Build the older version of the index entry */ entry = row_build_index_entry(node->row, index, heap); diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index d041e34a558..e42486f1e17 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -50,15 +50,21 @@ to que_run_threads: this is to allow canceling runaway queries */ /************************************************************************ Returns TRUE if the user-defined column values in a secondary index record -are the same as the corresponding columns in the clustered index record. */ +are the same as the corresponding columns in the clustered index record. +NOTE: the comparison is NOT done as a binary comparison, but character +fields are compared with collation! */ static ibool row_sel_sec_rec_is_for_clust_rec( /*=============================*/ - rec_t* sec_rec, - dict_index_t* sec_index, - rec_t* clust_rec, - dict_index_t* clust_index) + /* out: TRUE if the secondary + record is equal to the corresponding + fields in the clustered record, + when compared with collation */ + rec_t* sec_rec, /* in: secondary index record */ + dict_index_t* sec_index, /* in: secondary index */ + rec_t* clust_rec, /* in: clustered index record */ + dict_index_t* clust_index) /* in: clustered index */ { dict_col_t* col; byte* sec_field; @@ -84,9 +90,9 @@ row_sel_sec_rec_is_for_clust_rec( return(FALSE); } - if (sec_len != UNIV_SQL_NULL - && ut_memcmp(sec_field, clust_field, sec_len) != 0) { - + if (0 != cmp_data_data(dict_col_get_type(col), + clust_field, clust_len, + sec_field, sec_len)) { return(FALSE); } } @@ -763,7 +769,7 @@ row_sel_open_pcur( /************************************************************************* Restores a stored pcur position to a table index. */ -UNIV_INLINE +static ibool row_sel_restore_pcur_pos( /*=====================*/ @@ -813,7 +819,8 @@ row_sel_restore_pcur_pos( return(TRUE); } - ut_ad(relative_position == BTR_PCUR_AFTER); + ut_ad(relative_position == BTR_PCUR_AFTER + || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE); return(FALSE); } @@ -835,7 +842,8 @@ row_sel_restore_pcur_pos( plan->stored_cursor_rec_processed is TRUE, we must move to the previous record, else there is no need to move the cursor. */ - if (relative_position == BTR_PCUR_BEFORE) { + if (relative_position == BTR_PCUR_BEFORE + || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) { return(FALSE); } @@ -850,7 +858,8 @@ row_sel_restore_pcur_pos( return(FALSE); } - ut_ad(relative_position == BTR_PCUR_AFTER); + ut_ad(relative_position == BTR_PCUR_AFTER + || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE); return(TRUE); } @@ -1762,7 +1771,7 @@ fetch_step( if (sel_node->state == SEL_NODE_CLOSED) { /* SQL error detected */ - printf("SQL error %lu\n", DB_ERROR); + printf("SQL error %lu\n", (ulint)DB_ERROR); que_thr_handle_error(thr, DB_ERROR, NULL, 0); @@ -2251,7 +2260,7 @@ row_sel_get_clust_rec_for_mysql( /************************************************************************ Restores cursor position after it has been stored. We have to take into -account that the record cursor was positioned on can have been deleted. +account that the record cursor was positioned on may have been deleted. Then we may have to move the cursor one step up or down. */ static ibool @@ -2284,14 +2293,14 @@ sel_restore_position_for_mysql( if (moves_up) { btr_pcur_move_to_next(pcur, mtr); - - return(TRUE); } return(TRUE); } - if (relative_position == BTR_PCUR_AFTER) { + if (relative_position == BTR_PCUR_AFTER + || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) { + if (moves_up) { return(TRUE); } @@ -2303,7 +2312,8 @@ sel_restore_position_for_mysql( return(TRUE); } - ut_ad(relative_position == BTR_PCUR_BEFORE); + ut_ad(relative_position == BTR_PCUR_BEFORE + || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE); if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) { btr_pcur_move_to_next(pcur, mtr); @@ -2586,21 +2596,30 @@ row_search_for_mysql( let us try a search shortcut through the hash index */ + if (btr_search_latch.writer != RW_LOCK_NOT_LOCKED) { + /* There is an x-latch request: release + a possible s-latch to reduce starvation + and wait for BTR_SEA_TIMEOUT rounds before + trying to keep it again over calls from + MySQL */ + + if (trx->has_search_latch) { + rw_lock_s_unlock(&btr_search_latch); + trx->has_search_latch = FALSE; + } + + trx->search_latch_timeout = BTR_SEA_TIMEOUT; + + goto no_shortcut; + } + if (!trx->has_search_latch) { rw_lock_s_lock(&btr_search_latch); trx->has_search_latch = TRUE; - - } else if (btr_search_latch.writer_is_wait_ex) { - /* There is an x-latch request waiting: - release the s-latch for a moment to reduce - starvation */ - - rw_lock_s_unlock(&btr_search_latch); - rw_lock_s_lock(&btr_search_latch); } shortcut = row_sel_try_search_shortcut_for_mysql(&rec, - prebuilt, &mtr); + prebuilt, &mtr); if (shortcut == SEL_FOUND) { row_sel_store_mysql_rec(buf, prebuilt, rec); @@ -2609,7 +2628,16 @@ row_search_for_mysql( /* printf("%s shortcut\n", index->name); */ srv_n_rows_read++; + + if (trx->search_latch_timeout > 0 + && trx->has_search_latch) { + trx->search_latch_timeout--; + + rw_lock_s_unlock(&btr_search_latch); + trx->has_search_latch = FALSE; + } + trx->op_info = ""; return(DB_SUCCESS); @@ -2619,6 +2647,16 @@ row_search_for_mysql( /* printf("%s record not found 2\n", index->name); */ + + if (trx->search_latch_timeout > 0 + && trx->has_search_latch) { + + trx->search_latch_timeout--; + + rw_lock_s_unlock(&btr_search_latch); + trx->has_search_latch = FALSE; + } + trx->op_info = ""; return(DB_RECORD_NOT_FOUND); } @@ -2627,7 +2665,7 @@ row_search_for_mysql( mtr_start(&mtr); } } - +no_shortcut: if (trx->has_search_latch) { rw_lock_s_unlock(&btr_search_latch); trx->has_search_latch = FALSE; diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index a7c8957d61a..37f5b1f0bc1 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -443,6 +443,8 @@ row_undo_mod_del_unmark_sec( "InnoDB: Make a detailed bug report and send it\n"); fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + trx_print(thr_get_trx(thr)); + mem_free(err_buf); } else { btr_cur = btr_pcur_get_btr_cur(&pcur); @@ -552,7 +554,7 @@ row_undo_mod_upd_exist_sec( while (node->index != NULL) { index = node->index; - if (row_upd_changes_ord_field(node->row, node->index, + if (row_upd_changes_ord_field_binary(node->row, node->index, node->update)) { /* Build the newest version of the index entry */ diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 3fa98db3a02..fa859729141 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -72,6 +72,134 @@ searched delete is obviously to keep the x-latch for several steps of query graph execution. */ /************************************************************************* +Checks if index currently is mentioned as a referenced index in a foreign +key constraint. This function also loads into the dictionary cache the +possible referencing table. */ +static +ibool +row_upd_index_is_referenced( +/*========================*/ + /* out: TRUE if referenced; NOTE that since + we do not hold dict_foreign_key_check_lock + when leaving the function, it may be that + the referencing table has been dropped when + we leave this function: this function is only + for heuristic use! */ + dict_index_t* index) /* in: index */ +{ + dict_table_t* table = index->table; + dict_foreign_t* foreign; + ulint phase = 1; + +try_again: + if (!UT_LIST_GET_FIRST(table->referenced_list)) { + + return(FALSE); + } + + if (phase == 2) { + mutex_enter(&(dict_sys->mutex)); + } + + rw_lock_s_lock(&dict_foreign_key_check_lock); + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign) { + if (foreign->referenced_index == index) { + if (foreign->foreign_table == NULL) { + if (phase == 2) { + dict_table_get_low(foreign-> + foreign_table_name); + } else { + phase = 2; + rw_lock_s_unlock( + &dict_foreign_key_check_lock); + goto try_again; + } + } + + rw_lock_s_unlock(&dict_foreign_key_check_lock); + + if (phase == 2) { + mutex_exit(&(dict_sys->mutex)); + } + + return(TRUE); + } + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + + rw_lock_s_unlock(&dict_foreign_key_check_lock); + + if (phase == 2) { + mutex_exit(&(dict_sys->mutex)); + } + + return(FALSE); +} + +/************************************************************************* +Checks if possible foreign key constraints hold after a delete of the record +under pcur. NOTE that this function will temporarily commit mtr and lose +pcur position! */ +static +ulint +row_upd_check_references_constraints( +/*=================================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, or an error + code */ + btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the + cursor position is lost in this function! */ + dict_table_t* table, /* in: table in question */ + dict_index_t* index, /* in: index of the cursor */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr) /* in: mtr */ +{ + dict_foreign_t* foreign; + mem_heap_t* heap; + dtuple_t* entry; + rec_t* rec; + ulint err; + + rec = btr_pcur_get_rec(pcur); + + heap = mem_heap_create(500); + + entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); + + mtr_commit(mtr); + + mtr_start(mtr); + + rw_lock_s_lock(&dict_foreign_key_check_lock); + + foreign = UT_LIST_GET_FIRST(table->referenced_list); + + while (foreign) { + if (foreign->referenced_index == index) { + + err = row_ins_check_foreign_constraint(FALSE, foreign, + table, index, entry, thr); + if (err != DB_SUCCESS) { + rw_lock_s_unlock(&dict_foreign_key_check_lock); + mem_heap_free(heap); + + return(err); + } + } + + foreign = UT_LIST_GET_NEXT(referenced_list, foreign); + } + + rw_lock_s_unlock(&dict_foreign_key_check_lock); + mem_heap_free(heap); + + return(DB_SUCCESS); +} + +/************************************************************************* Creates an update node for a query graph. */ upd_node_t* @@ -484,13 +612,73 @@ upd_ext_vec_contains( } /******************************************************************* +Builds an update vector from those fields which in a secondary index entry +differ from a record that has the equal ordering fields. NOTE: we compare +the fields as binary strings! */ + +upd_t* +row_upd_build_sec_rec_difference_binary( +/*====================================*/ + /* out, own: update vector of differing + fields */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: entry to insert */ + rec_t* rec, /* in: secondary index record */ + mem_heap_t* heap) /* in: memory heap from which allocated */ +{ + upd_field_t* upd_field; + dfield_t* dfield; + byte* data; + ulint len; + upd_t* update; + ulint n_diff; + ulint i; + + /* This function is used only for a secondary index */ + ut_ad(0 == (index->type & DICT_CLUSTERED)); + + update = upd_create(dtuple_get_n_fields(entry), heap); + + n_diff = 0; + + for (i = 0; i < dtuple_get_n_fields(entry); i++) { + + data = rec_get_nth_field(rec, i, &len); + + dfield = dtuple_get_nth_field(entry, i); + + ut_a(len == dfield_get_len(dfield)); + + /* NOTE: we compare the fields as binary strings! + (No collation) */ + + if (!dfield_data_is_binary_equal(dfield, len, data)) { + + upd_field = upd_get_nth_field(update, n_diff); + + dfield_copy(&(upd_field->new_val), dfield); + + upd_field_set_field_no(upd_field, i, index); + + upd_field->extern_storage = FALSE; + + n_diff++; + } + } + + update->n_fields = n_diff; + + return(update); +} + +/******************************************************************* Builds an update vector from those fields, excluding the roll ptr and trx id fields, which in an index entry differ from a record that has -the equal ordering fields. */ +the equal ordering fields. NOTE: we compare the fields as binary strings! */ upd_t* -row_upd_build_difference( -/*=====================*/ +row_upd_build_difference_binary( +/*============================*/ /* out, own: update vector of differing fields, excluding roll ptr and trx id */ dict_index_t* index, /* in: clustered index */ @@ -527,10 +715,13 @@ row_upd_build_difference( dfield = dtuple_get_nth_field(entry, i); + /* NOTE: we compare the fields as binary strings! + (No collation) */ + if ((rec_get_nth_field_extern_bit(rec, i) != upd_ext_vec_contains(ext_vec, n_ext_vec, i)) || ((i != trx_id_pos) && (i != roll_ptr_pos) - && !dfield_data_is_equal(dfield, len, data))) { + && !dfield_data_is_binary_equal(dfield, len, data))) { upd_field = upd_get_nth_field(update, n_diff); @@ -630,13 +821,16 @@ row_upd_clust_index_replace_new_col_vals( /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. */ +fields in the index is small. Otherwise, this can be quadratic. +NOTE: we compare the fields as binary strings! */ ibool -row_upd_changes_ord_field( -/*======================*/ +row_upd_changes_ord_field_binary( +/*=============================*/ /* out: TRUE if update vector changes - an ordering field in the index record */ + an ordering field in the index record; + NOTE: the fields are compared as binary + strings */ dtuple_t* row, /* in: old value of row, or NULL if the row and the data values in update are not known when this function is called, e.g., at @@ -671,7 +865,7 @@ row_upd_changes_ord_field( if (col_pos == upd_field->field_no && (row == NULL - || !dfield_datas_are_equal( + || !dfield_datas_are_binary_equal( dtuple_get_nth_field(row, col_no), &(upd_field->new_val)))) { return(TRUE); @@ -683,11 +877,12 @@ row_upd_changes_ord_field( } /*************************************************************** -Checks if an update vector changes an ordering field of an index record. */ +Checks if an update vector changes an ordering field of an index record. +NOTE: we compare the fields as binary strings! */ ibool -row_upd_changes_some_index_ord_field( -/*=================================*/ +row_upd_changes_some_index_ord_field_binary( +/*========================================*/ /* out: TRUE if update vector may change an ordering field in an index record */ dict_table_t* table, /* in: table */ @@ -812,6 +1007,7 @@ row_upd_sec_index_entry( upd_node_t* node, /* in: row update node */ que_thr_t* thr) /* in: query thread */ { + ibool check_ref; ibool found; dict_index_t* index; dtuple_t* entry; @@ -825,6 +1021,8 @@ row_upd_sec_index_entry( index = node->index; + check_ref = row_upd_index_is_referenced(index); + heap = mem_heap_create(1024); /* Build old index entry */ @@ -855,6 +1053,8 @@ row_upd_sec_index_entry( "InnoDB: Make a detailed bug report and send it\n"); fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + trx_print(thr_get_trx(thr)); + mem_free(err_buf); } else { /* Delete mark the old index record; it can already be @@ -864,9 +1064,21 @@ row_upd_sec_index_entry( if (!rec_get_deleted_flag(rec)) { err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, &mtr); + if (err == DB_SUCCESS && check_ref) { + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints( + &pcur, index->table, + index, thr, &mtr); + if (err != DB_SUCCESS) { + + goto close_cur; + } + } + } } - +close_cur: btr_pcur_close(&pcur); mtr_commit(&mtr); @@ -907,8 +1119,8 @@ row_upd_sec_step( ut_ad(!(node->index->type & DICT_CLUSTERED)); if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field(node->row, node->index, - node->update)) { + || row_upd_changes_ord_field_binary(node->row, node->index, + node->update)) { err = row_upd_sec_index_entry(node, thr); return(err); @@ -931,6 +1143,8 @@ row_upd_clust_rec_by_insert( upd_node_t* node, /* in: row update node */ dict_index_t* index, /* in: clustered index of the record */ que_thr_t* thr, /* in: query thread */ + ibool check_ref,/* in: TRUE if index may be referenced in + a foreign key constraint */ mtr_t* mtr) /* in: mtr; gets committed here */ { mem_heap_t* heap; @@ -958,6 +1172,7 @@ row_upd_clust_rec_by_insert( return(err); } + /* Mark as not-owned the externally stored fields which the new row inherits from the delete marked record: purge should not free those externally stored fields even if the delete marked @@ -965,6 +1180,19 @@ row_upd_clust_rec_by_insert( btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur), node->update, mtr); + if (check_ref) { + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints( + pcur, table, + index, thr, mtr); + if (err != DB_SUCCESS) { + mtr_commit(mtr); + + return(err); + } + } + } mtr_commit(mtr); @@ -1095,6 +1323,8 @@ row_upd_del_mark_clust_rec( upd_node_t* node, /* in: row update node */ dict_index_t* index, /* in: clustered index */ que_thr_t* thr, /* in: query thread */ + ibool check_ref,/* in: TRUE if index may be referenced in + a foreign key constraint */ mtr_t* mtr) /* in: mtr; gets committed here */ { btr_pcur_t* pcur; @@ -1120,6 +1350,18 @@ row_upd_del_mark_clust_rec( err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, btr_cur, TRUE, thr, mtr); + if (err == DB_SUCCESS && check_ref) { + /* NOTE that the following call loses + the position of pcur ! */ + err = row_upd_check_references_constraints(pcur, index->table, + index, thr, mtr); + if (err != DB_SUCCESS) { + mtr_commit(mtr); + + return(err); + } + } + mtr_commit(mtr); return(err); @@ -1140,12 +1382,15 @@ row_upd_clust_step( dict_index_t* index; btr_pcur_t* pcur; ibool success; + ibool check_ref; ulint err; - mtr_t mtr_buf; mtr_t* mtr; + mtr_t mtr_buf; index = dict_table_get_first_index(node->table); + check_ref = row_upd_index_is_referenced(index); + pcur = node->pcur; /* We have to restore the cursor to its position */ @@ -1210,8 +1455,8 @@ row_upd_clust_step( /* NOTE: the following function calls will also commit mtr */ if (node->is_delete) { - err = row_upd_del_mark_clust_rec(node, index, thr, mtr); - + err = row_upd_del_mark_clust_rec(node, index, thr, check_ref, + mtr); if (err != DB_SUCCESS) { return(err); @@ -1244,7 +1489,7 @@ row_upd_clust_step( row_upd_store_row(node); - if (row_upd_changes_ord_field(node->row, index, node->update)) { + if (row_upd_changes_ord_field_binary(node->row, index, node->update)) { /* Update causes an ordering field (ordering fields within the B-tree) of the clustered index record to change: perform @@ -1257,8 +1502,8 @@ row_upd_clust_step( choosing records to update. MySQL solves now the problem externally! */ - err = row_upd_clust_rec_by_insert(node, index, thr, mtr); - + err = row_upd_clust_rec_by_insert(node, index, thr, check_ref, + mtr); if (err != DB_SUCCESS) { return(err); @@ -1304,8 +1549,8 @@ row_upd( interpreter: we must calculate it on the fly: */ if (node->is_delete || - row_upd_changes_some_index_ord_field(node->table, - node->update)) { + row_upd_changes_some_index_ord_field_binary( + node->table, node->update)) { node->cmpl_info = 0; } else { node->cmpl_info = UPD_NODE_NO_ORD_CHANGE; diff --git a/innobase/row/row0vers.c b/innobase/row/row0vers.c index 4dc65669247..5b62cd2b7e3 100644 --- a/innobase/row/row0vers.c +++ b/innobase/row/row0vers.c @@ -269,7 +269,13 @@ row_vers_old_has_index_entry( row = row_build(ROW_COPY_POINTERS, clust_index, rec, heap); entry = row_build_index_entry(row, index, heap); - if (dtuple_datas_are_equal(ientry, entry)) { + /* NOTE that we cannot do the comparison as binary + fields because the row is maybe being modified so that + the clustered index record has already been updated + to a different binary value in a char field, but the + collation identifies the old and new value anyway! */ + + if (dtuple_datas_are_ordering_equal(ientry, entry)) { mem_heap_free(heap); @@ -307,7 +313,13 @@ row_vers_old_has_index_entry( prev_version, heap); entry = row_build_index_entry(row, index, heap); - if (dtuple_datas_are_equal(ientry, entry)) { + /* NOTE that we cannot do the comparison as binary + fields because maybe the secondary index record has + already been updated to a different binary value in + a char field, but the collation identifies the old + and new value anyway! */ + + if (dtuple_datas_are_ordering_equal(ientry, entry)) { mem_heap_free(heap); diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index ba556e1c050..eda96091279 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -30,6 +30,7 @@ Created 10/8/1995 Heikki Tuuri #include "ut0mem.h" #include "os0proc.h" #include "mem0mem.h" +#include "mem0pool.h" #include "sync0sync.h" #include "sync0ipm.h" #include "thr0loc.h" @@ -46,11 +47,14 @@ Created 10/8/1995 Heikki Tuuri #include "ibuf0ibuf.h" #include "buf0flu.h" #include "btr0sea.h" +#include "dict0load.h" /* The following counter is incremented whenever there is some user activity in the server */ ulint srv_activity_count = 0; +char* srv_main_thread_op_info = ""; + /* Server parameters which are read from the initfile */ /* The following three are dir paths which are catenated before file @@ -66,6 +70,11 @@ ulint* srv_data_file_sizes = NULL; /* size in database pages */ ulint* srv_data_file_is_raw_partition = NULL; +/* If the following is TRUE we do not allow inserts etc. This protects +the user from forgetting the 'newraw' keyword to my.cnf */ + +ibool srv_created_new_raw = FALSE; + char** srv_log_group_home_dirs = NULL; ulint srv_n_log_groups = ULINT_MAX; @@ -75,6 +84,9 @@ ibool srv_log_archive_on = TRUE; ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ ibool srv_flush_log_at_trx_commit = TRUE; +byte srv_latin1_ordering[256]; /* The sort order table of the latin1 + character set */ + ibool srv_use_native_aio = FALSE; ulint srv_pool_size = ULINT_MAX; /* size in database pages; @@ -93,6 +105,11 @@ ulint srv_lock_wait_timeout = 1024 * 1024 * 1024; char* srv_unix_file_flush_method_str = NULL; ulint srv_unix_file_flush_method = 0; +/* If the following is != 0 we do not allow inserts etc. This protects +the user from forgetting innodb_force_recovery keyword to my.cnf */ + +ulint srv_force_recovery = 0; + ibool srv_use_doublewrite_buf = TRUE; ibool srv_set_thread_priorities = TRUE; @@ -115,10 +132,15 @@ ulint srv_n_rows_inserted = 0; ulint srv_n_rows_updated = 0; ulint srv_n_rows_deleted = 0; ulint srv_n_rows_read = 0; +ulint srv_n_rows_inserted_old = 0; +ulint srv_n_rows_updated_old = 0; +ulint srv_n_rows_deleted_old = 0; +ulint srv_n_rows_read_old = 0; ibool srv_print_innodb_monitor = FALSE; ibool srv_print_innodb_lock_monitor = FALSE; ibool srv_print_innodb_tablespace_monitor = FALSE; +ibool srv_print_innodb_table_monitor = FALSE; /* The parameters below are obsolete: */ @@ -1739,31 +1761,153 @@ srv_release_mysql_thread_if_suspended( } /************************************************************************* -A thread which wakes up threads whose lock wait may have lasted too long. */ +A thread which wakes up threads whose lock wait may have lasted too long. +This also prints the info output by various InnoDB monitors. */ #ifndef __WIN__ void* #else ulint #endif -srv_lock_timeout_monitor_thread( -/*============================*/ +srv_lock_timeout_and_monitor_thread( +/*================================*/ /* out: a dummy parameter */ void* arg) /* in: a dummy parameter required by os_thread_create */ { + double time_elapsed; + time_t current_time; + time_t last_monitor_time; ibool some_waits; srv_slot_t* slot; double wait_time; ulint i; UT_NOT_USED(arg); + last_monitor_time = time(NULL); loop: /* When someone is waiting for a lock, we wake up every second and check if a timeout has passed for a lock wait */ - os_thread_sleep(1000000); - + os_thread_sleep(1000000); + + /* In case mutex_exit is not a memory barrier, it is + theoretically possible some threads are left waiting though + the semaphore is already released. Wake up those threads: */ + + sync_arr_wake_threads_if_sema_free(); + + current_time = time(NULL); + + time_elapsed = difftime(current_time, last_monitor_time); + + if (time_elapsed > 15) { + + last_monitor_time = time(NULL); + + if (srv_print_innodb_monitor) { + + printf("=====================================\n"); + ut_print_timestamp(stdout); + + printf(" INNODB MONITOR OUTPUT\n" + "=====================================\n"); + printf("----------\n" + "SEMAPHORES\n" + "----------\n"); + sync_print(); + printf("------------\n" + "TRANSACTIONS\n" + "------------\n"); + lock_print_info(); + printf("--------\n" + "FILE I/O\n" + "--------\n"); + os_aio_print(); + printf("-------------\n" + "INSERT BUFFER\n" + "-------------\n"); + ibuf_print(); + printf("---\n" + "LOG\n" + "---\n"); + log_print(); + printf("----------------------\n" + "BUFFER POOL AND MEMORY\n" + "----------------------\n"); + printf( + "Total memory allocated %lu; in additional pool allocated %lu\n", + ut_total_allocated_memory, + mem_pool_get_reserved(mem_comm_pool)); + buf_print_io(); + printf("--------------\n" + "ROW OPERATIONS\n" + "--------------\n"); + printf("InnoDB main thread state: %s\n", + srv_main_thread_op_info); + printf( + "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n", + srv_n_rows_inserted, + srv_n_rows_updated, + srv_n_rows_deleted, + srv_n_rows_read); + printf( + "%.2f inserts/s, %.2f updates/s, %.2f deletes/s, %.2f reads/s\n", + (srv_n_rows_inserted - srv_n_rows_inserted_old) + / time_elapsed, + (srv_n_rows_updated - srv_n_rows_updated_old) + / time_elapsed, + (srv_n_rows_deleted - srv_n_rows_deleted_old) + / time_elapsed, + (srv_n_rows_read - srv_n_rows_read_old) + / time_elapsed); + + srv_n_rows_inserted_old = srv_n_rows_inserted; + srv_n_rows_updated_old = srv_n_rows_updated; + srv_n_rows_deleted_old = srv_n_rows_deleted; + srv_n_rows_read_old = srv_n_rows_read; + + printf("----------------------------\n" + "END OF INNODB MONITOR OUTPUT\n" + "============================\n"); + + + } + + if (srv_print_innodb_tablespace_monitor) { + + printf("================================================\n"); + + ut_print_timestamp(stdout); + + printf(" INNODB TABLESPACE MONITOR OUTPUT\n" + "================================================\n"); + + fsp_print(0); + fprintf(stderr, "Validating tablespace\n"); + fsp_validate(0); + fprintf(stderr, "Validation ok\n"); + printf("---------------------------------------\n" + "END OF INNODB TABLESPACE MONITOR OUTPUT\n" + "=======================================\n"); + } + + if (srv_print_innodb_table_monitor) { + + printf("===========================================\n"); + + ut_print_timestamp(stdout); + + printf(" INNODB TABLE MONITOR OUTPUT\n" + "===========================================\n"); + dict_print(); + + printf("-----------------------------------\n" + "END OF INNODB TABLE MONITOR OUTPUT\n" + "==================================\n"); + } + } + mutex_enter(&kernel_mutex); some_waits = FALSE; @@ -1786,11 +1930,10 @@ loop: /* Timeout exceeded or a wrap over in system time counter: cancel the lock request queued - by the transaction; NOTE that currently only - a record lock request can be waiting in - MySQL! */ + by the transaction and release possible + other transactions waiting behind */ - lock_rec_cancel( + lock_cancel_waiting_and_release( thr_get_trx(slot->thr)->wait_lock); } } @@ -1800,11 +1943,15 @@ loop: mutex_exit(&kernel_mutex); - if (some_waits) { + if (some_waits || srv_print_innodb_monitor + || srv_print_innodb_lock_monitor + || srv_print_innodb_tablespace_monitor + || srv_print_innodb_table_monitor) { goto loop; } - /* No one was waiting for a lock: suspend this thread */ + /* No one was waiting for a lock and no monitor was active: + suspend this thread */ os_event_wait(srv_lock_timeout_thread_event); @@ -1817,6 +1964,36 @@ loop: #endif } +/************************************************************************* +A thread which prints warnings about semaphore waits which have lasted +too long. These can be used to track bugs which cause hangs. */ + +#ifndef __WIN__ +void* +#else +ulint +#endif +srv_error_monitor_thread( +/*=====================*/ + /* out: a dummy parameter */ + void* arg) /* in: a dummy parameter required by + os_thread_create */ +{ + UT_NOT_USED(arg); +loop: + os_thread_sleep(10000000); + + sync_array_print_long_waits(); + + goto loop; + +#ifndef __WIN__ + return(NULL); +#else + return(0); +#endif +} + /*********************************************************************** Tells the InnoDB server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used @@ -1855,15 +2032,18 @@ srv_master_thread( os_thread_create */ { os_event_t event; + time_t last_flush_time; + time_t current_time; ulint old_activity_count; ulint n_pages_purged; ulint n_bytes_merged; ulint n_pages_flushed; ulint n_bytes_archived; + ulint n_ios; + ulint n_ios_old; + ulint n_ios_very_old; + ulint n_pend_ios; ulint i; - time_t last_flush_time; - time_t current_time; - time_t last_monitor_time; UT_NOT_USED(arg); @@ -1876,26 +2056,56 @@ srv_master_thread( mutex_exit(&kernel_mutex); os_event_set(srv_sys->operational); - - last_monitor_time = time(NULL); loop: + srv_main_thread_op_info = "reserving kernel mutex"; + + n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; mutex_enter(&kernel_mutex); old_activity_count = srv_activity_count; mutex_exit(&kernel_mutex); - /* We run purge every 10 seconds, even if the server were active: */ + /* We run purge and a batch of ibuf_contract every 10 seconds, even + if the server were active: */ for (i = 0; i < 10; i++) { + n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + + srv_main_thread_op_info = "sleeping"; os_thread_sleep(1000000); + if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { + + goto loop; + } + /* We flush the log once in a second even if no commit is issued or the we have specified in my.cnf no flush at transaction commit */ + srv_main_thread_op_info = "flushing log"; log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + /* If there were less than 10 i/os during the + one second sleep, we assume that there is free + disk i/o capacity available, and it makes sense to + do an insert buffer merge. */ + + n_pend_ios = buf_get_n_pending_ios() + + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + if (n_pend_ios < 3 && (n_ios - n_ios_old < 10)) { + srv_main_thread_op_info = "doing insert buffer merge"; + ibuf_contract_for_n_pages(TRUE, 5); + + srv_main_thread_op_info = "flushing log"; + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + } + if (srv_activity_count == old_activity_count) { if (srv_print_thread_releases) { @@ -1910,28 +2120,48 @@ loop: printf("Master thread wakes up!\n"); } + /* If there were less than 200 i/os during the 10 second period, + we assume that there is free disk i/o capacity available, and it + makes sense to do a buffer pool flush. */ + + n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { + + srv_main_thread_op_info = "flushing buffer pool pages"; + buf_flush_batch(BUF_FLUSH_LIST, 50, ut_dulint_max); + + srv_main_thread_op_info = "flushing log"; + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + } + + /* We run a batch of insert buffer merge every 10 seconds, + even if the server were active */ + + srv_main_thread_op_info = "doing insert buffer merge"; + ibuf_contract_for_n_pages(TRUE, 5); + + srv_main_thread_op_info = "flushing log"; + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); + + /* We run a full purge every 10 seconds, even if the server + were active */ + n_pages_purged = 1; last_flush_time = time(NULL); while (n_pages_purged) { - /* TODO: replace this by a check if we are running - out of file space! */ - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB starts purge\n"); - } - - n_pages_purged = trx_purge(); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB purged %lu pages\n", n_pages_purged); - } + srv_main_thread_op_info = "purging"; + n_pages_purged = trx_purge(); current_time = time(NULL); if (difftime(current_time, last_flush_time) > 1) { + srv_main_thread_op_info = "flushing log"; + log_flush_up_to(ut_dulint_max, LOG_WAIT_ONE_GROUP); last_flush_time = current_time; } @@ -1941,67 +2171,7 @@ background_loop: /* In this loop we run background operations when the server is quiet */ - current_time = time(NULL); - - if (difftime(current_time, last_monitor_time) > 15) { - - last_monitor_time = time(NULL); - - if (srv_print_innodb_monitor) { - - printf("=====================================\n"); - ut_print_timestamp(stdout); - - printf(" INNODB MONITOR OUTPUT\n" - "=====================================\n"); - printf("------------\n" - "TRANSACTIONS\n" - "------------\n"); - lock_print_info(); - printf("-----------------------------------------------\n" - "CURRENT SEMAPHORES RESERVED AND SEMAPHORE WAITS\n" - "-----------------------------------------------\n"); - sync_print(); - printf("CURRENT PENDING FILE I/O'S\n" - "--------------------------\n"); - os_aio_print(); - printf("-----------\n" - "BUFFER POOL\n" - "-----------\n"); - buf_print_io(); - printf("--------------\n" - "ROW OPERATIONS\n" - "--------------\n"); - printf( - "Number of rows inserted %lu, updated %lu, deleted %lu, read %lu\n", - srv_n_rows_inserted, - srv_n_rows_updated, - srv_n_rows_deleted, - srv_n_rows_read); - printf("Server activity counter %lu\n", srv_activity_count); - printf("----------------------------\n" - "END OF INNODB MONITOR OUTPUT\n" - "============================\n"); - } - - if (srv_print_innodb_tablespace_monitor) { - - printf("================================================\n"); - - ut_print_timestamp(stdout); - - printf(" INNODB TABLESPACE MONITOR OUTPUT\n" - "================================================\n"); - - fsp_print(0); - fprintf(stderr, "Validating tablespace\n"); - fsp_validate(0); - fprintf(stderr, "Validation ok\n"); - printf("---------------------------------------\n" - "END OF INNODB TABLESPACE MONITOR OUTPUT\n" - "=======================================\n"); - } - } + srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { @@ -2014,17 +2184,11 @@ background_loop: /* The server has been quiet for a while: start running background operations */ - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB starts purge\n"); - } + srv_main_thread_op_info = "purging"; n_pages_purged = trx_purge(); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB purged %lu pages\n", n_pages_purged); - } + srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { @@ -2033,17 +2197,10 @@ background_loop: } mutex_exit(&kernel_mutex); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB starts insert buffer merge\n"); - } + srv_main_thread_op_info = "doing insert buffer merge"; + n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20); - n_bytes_merged = ibuf_contract(TRUE); - - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB merged %lu bytes\n", n_bytes_merged); - } + srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { @@ -2052,17 +2209,10 @@ background_loop: } mutex_exit(&kernel_mutex); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB (main thread) starts buffer pool flush\n"); - } - + srv_main_thread_op_info = "flushing buffer pool pages"; n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); - if (srv_print_innodb_monitor) { - ut_print_timestamp(stdout); - printf(" InnoDB flushed %lu pages\n", n_pages_flushed); - } + srv_main_thread_op_info = "reserving kernel mutex"; mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { @@ -2071,16 +2221,23 @@ background_loop: } mutex_exit(&kernel_mutex); + srv_main_thread_op_info = "waiting for buffer pool flush to end"; buf_flush_wait_batch_end(BUF_FLUSH_LIST); + srv_main_thread_op_info = "making checkpoint"; + log_checkpoint(TRUE, FALSE); + srv_main_thread_op_info = "reserving kernel mutex"; + mutex_enter(&kernel_mutex); if (srv_activity_count != old_activity_count) { mutex_exit(&kernel_mutex); goto loop; } mutex_exit(&kernel_mutex); + + srv_main_thread_op_info = "archiving log (if log archive is on)"; log_archive_do(FALSE, &n_bytes_archived); @@ -2098,12 +2255,16 @@ background_loop: /* There is no work for background operations either: suspend master thread to wait for more server activity */ + srv_main_thread_op_info = "suspending"; + mutex_enter(&kernel_mutex); event = srv_suspend_thread(); mutex_exit(&kernel_mutex); + srv_main_thread_op_info = "waiting for server activity"; + os_event_wait(event); goto loop; diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index 15d99ab3001..b9e7050d546 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -73,7 +73,10 @@ os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; #define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD #define SRV_MAX_N_PENDING_SYNC_IOS 100 -#define SRV_MAX_N_OPEN_FILES 25 +/* The following limit may be too big in some old operating systems: +we may get an assertion failure in os0file.c */ + +#define SRV_MAX_N_OPEN_FILES 500 #define SRV_LOG_SPACE_FIRST_ID 1000000000 @@ -315,7 +318,12 @@ open_or_create_data_files( ulint size_high; char name[10000]; - ut_a(srv_n_data_files < 1000); + if (srv_n_data_files >= 1000) { + fprintf(stderr, "InnoDB: can only have < 1000 data files\n" + "InnoDB: you have defined %lu\n", + srv_n_data_files); + return(DB_ERROR); + } *sum_of_new_sizes = 0; @@ -336,6 +344,8 @@ open_or_create_data_files( /* The partition is opened, not created; then it is written over */ + srv_created_new_raw = TRUE; + files[i] = os_file_create( name, OS_FILE_OPEN, OS_FILE_NORMAL, OS_DATA_FILE, &ret); @@ -375,6 +385,7 @@ open_or_create_data_files( if (!ret) { fprintf(stderr, "InnoDB: Error in opening %s\n", name); + os_file_get_last_error(); return(DB_ERROR); } @@ -537,9 +548,6 @@ innobase_start_or_create_for_mysql(void) /*====================================*/ /* out: DB_SUCCESS or error code */ { - ulint i; - ulint k; - ulint err; ibool create_new_db; ibool log_file_created; ibool log_created = FALSE; @@ -550,6 +558,9 @@ innobase_start_or_create_for_mysql(void) ulint max_arch_log_no; ibool start_archive; ulint sum_of_new_sizes; + ulint err; + ulint i; + ulint k; mtr_t mtr; log_do_write = TRUE; @@ -866,17 +877,19 @@ innobase_start_or_create_for_mysql(void) SRV_MAX_N_IO_THREADS); */ } - /* Create the master thread which monitors the database - server, and does purge and other utility operations */ - - os_thread_create(&srv_master_thread, NULL, thread_ids + 1 + - SRV_MAX_N_IO_THREADS); /* fprintf(stderr, "Max allowed record size %lu\n", page_get_free_space_of_empty() / 2); */ - /* Create the thread which watches the timeouts for lock waits */ - os_thread_create(&srv_lock_timeout_monitor_thread, NULL, + /* Create the thread which watches the timeouts for lock waits + and prints InnoDB monitor info */ + + os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS); + + /* Create the thread which warns of long semaphore waits */ + os_thread_create(&srv_error_monitor_thread, NULL, + thread_ids + 3 + SRV_MAX_N_IO_THREADS); + srv_was_started = TRUE; srv_is_being_started = FALSE; @@ -886,6 +899,17 @@ innobase_start_or_create_for_mysql(void) trx_sys_create_doublewrite_buf(); } + err = dict_create_or_check_foreign_constraint_tables(); + + if (err != DB_SUCCESS) { + return((int)DB_ERROR); + } + + /* Create the master thread which monitors the database + server, and does purge and other utility operations */ + + os_thread_create(&srv_master_thread, NULL, thread_ids + 1 + + SRV_MAX_N_IO_THREADS); /* buf_debug_prints = TRUE; */ ut_print_timestamp(stderr); @@ -903,12 +927,16 @@ innobase_shutdown_for_mysql(void) /* out: DB_SUCCESS or error code */ { if (!srv_was_started) { - if (srv_is_being_started) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: shutting down a not properly started database\n"); - } - return(DB_SUCCESS); + if (srv_is_being_started) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: shutting down a not properly started\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: or created database!\n"); + } + + return(DB_SUCCESS); } /* Flush buffer pool to disk, write the current lsn to @@ -917,6 +945,6 @@ innobase_shutdown_for_mysql(void) logs_empty_and_mark_files_at_shutdown(); ut_free_all_mem(); - + return((int) DB_SUCCESS); } diff --git a/innobase/sync/sync0arr.c b/innobase/sync/sync0arr.c index 4183f3f1c4c..e4c351b9d21 100644 --- a/innobase/sync/sync0arr.c +++ b/innobase/sync/sync0arr.c @@ -14,6 +14,7 @@ Created 9/5/1995 Heikki Tuuri #include "sync0sync.h" #include "sync0rw.h" #include "os0sync.h" +#include "srv0srv.h" /* WAIT ARRAY @@ -64,6 +65,8 @@ struct sync_cell_struct { ibool event_set; /* TRUE if the event is set */ os_event_t event; /* operating system event semaphore handle */ + time_t reservation_time;/* time when the thread reserved + the wait cell */ }; /* NOTE: It is allowed for a thread to wait @@ -321,16 +324,12 @@ sync_array_reserve_cell( sync_array_t* arr, /* in: wait array */ void* object, /* in: pointer to the object to wait for */ ulint type, /* in: lock request type */ - #ifdef UNIV_SYNC_DEBUG - char* file, /* in: in debug version file where - requested */ - ulint line, /* in: in the debug version line where - requested */ - #endif + char* file, /* in: file where requested */ + ulint line, /* in: line where requested */ ulint* index) /* out: index of the reserved cell */ { - ulint i; sync_cell_t* cell; + ulint i; ut_a(object); ut_a(index); @@ -350,18 +349,15 @@ sync_array_reserve_cell( sync_cell_event_reset(cell); } + cell->reservation_time = time(NULL); + cell->thread = os_thread_get_curr_id(); + cell->wait_object = object; cell->request_type = type; - cell->thread = os_thread_get_curr_id(); cell->waiting = FALSE; - #ifdef UNIV_SYNC_DEBUG cell->file = file; cell->line = line; - #else - cell->file = "NOT KNOWN"; - cell->line = 0; - #endif arr->n_reserved++; @@ -436,6 +432,7 @@ static void sync_array_cell_print( /*==================*/ + FILE* file, /* in: file where to print */ sync_cell_t* cell) /* in: sync cell */ { mutex_t* mutex; @@ -445,53 +442,63 @@ sync_array_cell_print( type = cell->request_type; + fprintf(file, +"--Thread %lu has waited at %s line %lu for %.2f seconds the semaphore:\n", + (ulint)cell->thread, cell->file, cell->line, + difftime(time(NULL), cell->reservation_time)); + if (type == SYNC_MUTEX) { - str = "MUTEX ENTER"; mutex = (mutex_t*)cell->wait_object; - printf("Mutex created in file %s line %lu", - mutex->cfile_name, mutex->cline); + fprintf(file, + "Mutex at %lx created file %s line %lu, lock var %lu\n", + (ulint)mutex, mutex->cfile_name, mutex->cline, + mutex->lock_word); + fprintf(file, + "Last time reserved in file %s line %lu, waiters flag %lu\n", + mutex->file_name, mutex->line, mutex->waiters); + } else if (type == RW_LOCK_EX || type == RW_LOCK_SHARED) { if (type == RW_LOCK_EX) { - str = "X-LOCK"; + fprintf(file, "X-lock on"); } else { - str = "S_LOCK"; + fprintf(file, "S-lock on"); } rwlock = (rw_lock_t*)cell->wait_object; - printf("Rw-latch created in file %s line %lu", - rwlock->cfile_name, rwlock->cline); + fprintf(file, " RW-latch at %lx created in file %s line %lu\n", + (ulint)rwlock, rwlock->cfile_name, rwlock->cline); if (rwlock->writer != RW_LOCK_NOT_LOCKED) { - printf(" writer reserved with %lu", rwlock->writer); + fprintf(file, + "a writer (thread id %lu) has reserved it in mode", + (ulint)rwlock->writer_thread); + if (rwlock->writer == RW_LOCK_EX) { + fprintf(file, " exclusive\n"); + } else { + fprintf(file, " wait exclusive\n"); + } } - if (rwlock->writer == RW_LOCK_EX) { - printf(" reserv. thread id %lu", - (ulint)rwlock->writer_thread); - } - - if (rwlock->reader_count > 0) { - printf(" readers %lu", rwlock->reader_count); - } + fprintf(file, "number of readers %lu, waiters flag %lu\n", + rwlock->reader_count, rwlock->waiters); + + fprintf(file, "Last time read locked in file %s line %lu\n", + rwlock->last_s_file_name, rwlock->last_s_line); + fprintf(file, "Last time write locked in file %s line %lu\n", + rwlock->last_x_file_name, rwlock->last_x_line); } else { ut_error; } - printf(" at addr %lx waited for by thread %lu op. %s file %s line %lu ", - (ulint)cell->wait_object, - (ulint)cell->thread, - str, cell->file, cell->line); if (!cell->waiting) { - printf("WAIT ENDED "); + fprintf(file, "wait has ended\n"); } if (cell->event_set) { - printf("EVENT SET"); + fprintf(file, "wait is ending\n"); } - - printf("\n"); } /********************************************************************** @@ -620,14 +627,15 @@ sync_array_detect_deadlock( released the mutex: in this case no deadlock can occur, as the wait array cannot contain a thread with ID_UNDEFINED value. */ + ret = sync_array_deadlock_step(arr, start, thread, 0, - depth); + depth); if (ret) { printf( "Mutex %lx owned by thread %lu file %s line %lu\n", (ulint)mutex, mutex->thread_id, mutex->file_name, mutex->line); - sync_array_cell_print(cell); + sync_array_cell_print(stdout, cell); return(TRUE); } } @@ -636,11 +644,11 @@ sync_array_detect_deadlock( } else if (cell->request_type == RW_LOCK_EX) { - lock = cell->wait_object; + lock = cell->wait_object; - debug = UT_LIST_GET_FIRST(lock->debug_list); + debug = UT_LIST_GET_FIRST(lock->debug_list); - while (debug != NULL) { + while (debug != NULL) { thread = debug->thread_id; @@ -661,23 +669,23 @@ sync_array_detect_deadlock( if (ret) { printf("rw-lock %lx ", (ulint) lock); rw_lock_debug_print(debug); - sync_array_cell_print(cell); + sync_array_cell_print(stdout, cell); return(TRUE); } } debug = UT_LIST_GET_NEXT(list, debug); - } + } - return(FALSE); + return(FALSE); } else if (cell->request_type == RW_LOCK_SHARED) { - lock = cell->wait_object; - debug = UT_LIST_GET_FIRST(lock->debug_list); + lock = cell->wait_object; + debug = UT_LIST_GET_FIRST(lock->debug_list); - while (debug != NULL) { + while (debug != NULL) { thread = debug->thread_id; @@ -694,16 +702,16 @@ sync_array_detect_deadlock( if (ret) { printf("rw-lock %lx ", (ulint) lock); rw_lock_debug_print(debug); - sync_array_cell_print(cell); + sync_array_cell_print(stdout, cell); return(TRUE); } } debug = UT_LIST_GET_NEXT(list, debug); - } + } - return(FALSE); + return(FALSE); } else { ut_error; @@ -714,6 +722,55 @@ sync_array_detect_deadlock( } /********************************************************************** +Determines if we can wake up the thread waiting for a sempahore. */ +static +ibool +sync_arr_cell_can_wake_up( +/*======================*/ + sync_cell_t* cell) /* in: cell to search */ +{ + mutex_t* mutex; + rw_lock_t* lock; + + if (cell->request_type == SYNC_MUTEX) { + + mutex = cell->wait_object; + + if (mutex_get_lock_word(mutex) == 0) { + + return(TRUE); + } + + } else if (cell->request_type == RW_LOCK_EX) { + + lock = cell->wait_object; + + if (rw_lock_get_reader_count(lock) == 0 + && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { + + return(TRUE); + } + + if (rw_lock_get_reader_count(lock) == 0 + && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX + && lock->writer_thread == cell->thread) { + + return(TRUE); + } + + } else if (cell->request_type == RW_LOCK_SHARED) { + lock = cell->wait_object; + + if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { + + return(TRUE); + } + } + + return(FALSE); +} + +/********************************************************************** Frees the cell. NOTE! sync_array_wait_event frees the cell automatically! */ @@ -740,9 +797,8 @@ sync_array_free_cell( } /************************************************************************** -Looks for the cells in the wait array which refer -to the wait object specified, -and sets their corresponding events to the signaled state. In this +Looks for the cells in the wait array which refer to the wait object +specified, and sets their corresponding events to the signaled state. In this way releases the threads waiting for the object to contend for the object. It is possible that no such cell is found, in which case does nothing. */ @@ -783,6 +839,88 @@ sync_array_signal_object( } /************************************************************************** +If the wakeup algorithm does not work perfectly at semaphore relases, +this function will do the waking (see the comment in mutex_exit). This +function should be called about every 1 second in the server. */ + +void +sync_arr_wake_threads_if_sema_free(void) +/*====================================*/ +{ + sync_array_t* arr = sync_primary_wait_array; + sync_cell_t* cell; + ulint count; + ulint i; + + sync_array_enter(arr); + + i = 0; + count = 0; + + while (count < arr->n_reserved) { + + cell = sync_array_get_nth_cell(arr, i); + + if (cell->wait_object != NULL) { + + count++; + + if (sync_arr_cell_can_wake_up(cell)) { + + sync_cell_event_set(cell); + } + } + + i++; + } + + sync_array_exit(arr); +} + +/************************************************************************** +Prints warnings of long semaphore waits to stderr. Currently > 120 sec. */ + +void +sync_array_print_long_waits(void) +/*=============================*/ +{ + sync_cell_t* cell; + ibool old_val; + ibool noticed = FALSE; + ulint i; + + for (i = 0; i < sync_primary_wait_array->n_cells; i++) { + + cell = sync_array_get_nth_cell(sync_primary_wait_array, i); + + if (cell->wait_object != NULL + && difftime(time(NULL), cell->reservation_time) > 120) { + + fprintf(stderr, + "InnoDB: Warning: a long semaphore wait:\n"); + sync_array_cell_print(stderr, cell); + + noticed = TRUE; + } + } + + if (noticed) { + fprintf(stderr, +"InnoDB: ###### Starts InnoDB Monitor for 30 secs to print diagnostic info:\n"); + old_val = srv_print_innodb_monitor; + + srv_print_innodb_monitor = TRUE; + os_event_set(srv_lock_timeout_thread_event); + + os_thread_sleep(30000000); + + srv_print_innodb_monitor = old_val; + fprintf(stderr, +"InnoDB: ###### Diagnostic info printed to the standard output\n"); + } +} + +/************************************************************************** Prints info of the wait array. */ static void @@ -795,9 +933,8 @@ sync_array_output_info( ulint count; ulint i; - printf("-----------------------------------------------------\n"); - printf("SYNC ARRAY INFO: reservation count %ld, signal count %ld\n", - arr->res_count, arr->sg_count); + printf("OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n", + arr->res_count, arr->sg_count); i = 0; count = 0; @@ -807,7 +944,7 @@ sync_array_output_info( if (cell->wait_object != NULL) { count++; - sync_array_cell_print(cell); + sync_array_cell_print(stdout, cell); } i++; diff --git a/innobase/sync/sync0rw.c b/innobase/sync/sync0rw.c index dc49ce2197e..1ef2920618f 100644 --- a/innobase/sync/sync0rw.c +++ b/innobase/sync/sync0rw.c @@ -17,11 +17,13 @@ Created 9/11/1995 Heikki Tuuri ulint rw_s_system_call_count = 0; ulint rw_s_spin_wait_count = 0; +ulint rw_s_os_wait_count = 0; ulint rw_s_exit_count = 0; ulint rw_x_system_call_count = 0; ulint rw_x_spin_wait_count = 0; +ulint rw_x_os_wait_count = 0; ulint rw_x_exit_count = 0; @@ -95,8 +97,7 @@ rw_lock_create_func( mutex_create(rw_lock_get_mutex(lock)); mutex_set_level(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK); - ut_memcpy(&(lock->mutex.cfile_name), cfile_name, - ut_min(RW_CNAME_LEN - 1, ut_strlen(cfile_name))); + lock->mutex.cfile_name = cfile_name; lock->mutex.cline = cline; rw_lock_set_waiters(lock, 0); @@ -111,11 +112,14 @@ rw_lock_create_func( lock->magic_n = RW_LOCK_MAGIC_N; lock->level = SYNC_LEVEL_NONE; - ut_memcpy(&(lock->cfile_name), cfile_name, - ut_min(RW_CNAME_LEN - 1, ut_strlen(cfile_name))); - lock->cfile_name[RW_CNAME_LEN - 1] = '\0'; + lock->cfile_name = cfile_name; lock->cline = cline; + lock->last_s_file_name = "not yet reserved"; + lock->last_x_file_name = "not yet reserved"; + lock->last_s_line = 0; + lock->last_x_line = 0; + mutex_enter(&rw_lock_list_mutex); UT_LIST_ADD_FIRST(list, rw_lock_list, lock); @@ -186,14 +190,11 @@ for the lock, before suspending the thread. */ void rw_lock_s_lock_spin( /*================*/ - rw_lock_t* lock /* in: pointer to rw-lock */ - #ifdef UNIV_SYNC_DEBUG - ,ulint pass, /* in: pass value; != 0, if the lock + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + ulint line) /* in: line where requested */ { ulint index; /* index of the reserved wait cell */ ulint i; /* spin round count */ @@ -203,7 +204,7 @@ rw_lock_s_lock_spin( lock_loop: rw_s_spin_wait_count++; - /* Spin waiting for the writer field to become free */ + /* Spin waiting for the writer field to become free */ i = 0; while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED @@ -223,19 +224,14 @@ lock_loop: printf( "Thread %lu spin wait rw-s-lock at %lx cfile %s cline %lu rnds %lu\n", os_thread_get_curr_id(), (ulint)lock, - &(lock->cfile_name), lock->cline, i); + lock->cfile_name, lock->cline, i); } mutex_enter(rw_lock_get_mutex(lock)); /* We try once again to obtain the lock */ - if (TRUE == rw_lock_s_lock_low(lock - #ifdef UNIV_SYNC_DEBUG - , pass, file_name, - line - #endif - )) { + if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { mutex_exit(rw_lock_get_mutex(lock)); return; /* Success */ @@ -247,9 +243,7 @@ lock_loop: sync_array_reserve_cell(sync_primary_wait_array, lock, RW_LOCK_SHARED, - #ifdef UNIV_SYNC_DEBUG file_name, line, - #endif &index); rw_lock_set_waiters(lock, 1); @@ -260,12 +254,13 @@ lock_loop: printf( "Thread %lu OS wait rw-s-lock at %lx cfile %s cline %lu\n", os_thread_get_curr_id(), (ulint)lock, - &(lock->cfile_name), lock->cline); + lock->cfile_name, lock->cline); } rw_s_system_call_count++; + rw_s_os_wait_count++; - sync_array_wait_event(sync_primary_wait_array, index); + sync_array_wait_event(sync_primary_wait_array, index); goto lock_loop; } @@ -307,13 +302,10 @@ rw_lock_x_lock_low( not succeed, RW_LOCK_EX if success, RW_LOCK_WAIT_EX, if got wait reservation */ rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass /* in: pass value; != 0, if the lock will + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ut_ad(mutex_own(rw_lock_get_mutex(lock))); @@ -330,6 +322,8 @@ rw_lock_x_lock_low( rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name, line); #endif + lock->last_x_file_name = file_name; + lock->last_x_line = line; /* Locking succeeded, we may return */ return(RW_LOCK_EX); @@ -364,6 +358,9 @@ rw_lock_x_lock_low( file_name, line); #endif + lock->last_x_file_name = file_name; + lock->last_x_line = line; + /* Locking succeeded, we may return */ return(RW_LOCK_EX); } @@ -382,6 +379,9 @@ rw_lock_x_lock_low( line); #endif + lock->last_x_file_name = file_name; + lock->last_x_line = line; + /* Locking succeeded, we may return */ return(RW_LOCK_EX); } @@ -404,13 +404,10 @@ void rw_lock_x_lock_func( /*================*/ rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass /* in: pass value; != 0, if the lock will + ulint pass, /* in: pass value; != 0, if the lock will be passed to another thread to unlock */ - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where lock requested */ - ulint line /* in: line where requested */ - #endif -) + char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ { ulint index; /* index of the reserved wait cell */ ulint state; /* lock state acquired */ @@ -422,11 +419,7 @@ lock_loop: /* Acquire the mutex protecting the rw-lock fields */ mutex_enter_fast(&(lock->mutex)); - state = rw_lock_x_lock_low(lock, pass - #ifdef UNIV_SYNC_DEBUG - ,file_name, line - #endif - ); + state = rw_lock_x_lock_low(lock, pass, file_name, line); mutex_exit(&(lock->mutex)); @@ -469,6 +462,7 @@ lock_loop: os_thread_yield(); } } else { + i = 0; /* Eliminate a compiler warning */ ut_error; } @@ -476,7 +470,7 @@ lock_loop: printf( "Thread %lu spin wait rw-x-lock at %lx cfile %s cline %lu rnds %lu\n", os_thread_get_curr_id(), (ulint)lock, - &(lock->cfile_name), lock->cline, i); + lock->cfile_name, lock->cline, i); } rw_x_spin_wait_count++; @@ -486,11 +480,7 @@ lock_loop: mutex_enter(rw_lock_get_mutex(lock)); - state = rw_lock_x_lock_low(lock, pass - #ifdef UNIV_SYNC_DEBUG - ,file_name, line - #endif - ); + state = rw_lock_x_lock_low(lock, pass, file_name, line); if (state == RW_LOCK_EX) { mutex_exit(rw_lock_get_mutex(lock)); @@ -502,9 +492,7 @@ lock_loop: sync_array_reserve_cell(sync_primary_wait_array, lock, RW_LOCK_EX, - #ifdef UNIV_SYNC_DEBUG file_name, line, - #endif &index); rw_lock_set_waiters(lock, 1); @@ -514,11 +502,12 @@ lock_loop: if (srv_print_latch_waits) { printf( "Thread %lu OS wait for rw-x-lock at %lx cfile %s cline %lu\n", - os_thread_get_curr_id(), (ulint)lock, &(lock->cfile_name), + os_thread_get_curr_id(), (ulint)lock, lock->cfile_name, lock->cline); } rw_x_system_call_count++; + rw_x_os_wait_count++; sync_array_wait_event(sync_primary_wait_array, index); @@ -537,8 +526,8 @@ rw_lock_debug_mutex_enter(void) /*==========================*/ { loop: - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - + if (0 == mutex_enter_nowait(&rw_lock_debug_mutex, + IB__FILE__, __LINE__)) { return; } @@ -546,8 +535,8 @@ loop: rw_lock_debug_waiters = TRUE; - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - + if (0 == mutex_enter_nowait(&rw_lock_debug_mutex, + IB__FILE__, __LINE__)) { return; } @@ -747,8 +736,6 @@ rw_lock_list_print_info(void) /*=========================*/ { #ifndef UNIV_SYNC_DEBUG - printf( - "Sorry, cannot give rw-lock list info in non-debug version!\n"); #else rw_lock_t* lock; ulint count = 0; @@ -756,8 +743,9 @@ rw_lock_list_print_info(void) mutex_enter(&rw_lock_list_mutex); - printf("----------------------------------------------\n"); - printf("RW-LOCK INFO\n"); + printf("-------------\n"); + printf("RW-LATCH INFO\n"); + printf("-------------\n"); lock = UT_LIST_GET_FIRST(rw_lock_list); @@ -810,9 +798,9 @@ rw_lock_print( ulint count = 0; rw_lock_debug_t* info; - printf("-------------------------------------------------\n"); - printf("RW-LOCK INFO\n"); - printf("RW-LOCK: %lx ", (ulint)lock); + printf("-------------\n"); + printf("RW-LATCH INFO\n"); + printf("RW-LATCH: %lx ", (ulint)lock); if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) || (rw_lock_get_reader_count(lock) != 0) diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c index f0dbe145098..8b2a39e15eb 100644 --- a/innobase/sync/sync0sync.c +++ b/innobase/sync/sync0sync.c @@ -119,6 +119,7 @@ ulint mutex_system_call_count = 0; ulint mutex_spin_round_count = 0; ulint mutex_spin_wait_count = 0; +ulint mutex_os_wait_count = 0; ulint mutex_exit_count = 0; /* The global array of wait cells for implementation of the database's own @@ -228,12 +229,10 @@ mutex_create_func( mutex_set_waiters(mutex, 0); mutex->magic_n = MUTEX_MAGIC_N; mutex->line = 0; - mutex->file_name = "FILE NOT KNOWN"; + mutex->file_name = "not yet reserved"; mutex->thread_id = ULINT_UNDEFINED; mutex->level = SYNC_LEVEL_NONE; - ut_memcpy(&(mutex->cfile_name), cfile_name, - ut_min(MUTEX_CNAME_LEN - 1, ut_strlen(cfile_name))); - mutex->cfile_name[MUTEX_CNAME_LEN - 1] = '\0'; + mutex->cfile_name = cfile_name; mutex->cline = cline; /* Check that lock_word is aligned; this is important on Intel */ @@ -291,17 +290,23 @@ immediately, returns with return value 1. */ ulint mutex_enter_nowait( /*===============*/ - /* out: 0 if succeed, 1 if not */ - mutex_t* mutex) /* in: pointer to mutex */ + /* out: 0 if succeed, 1 if not */ + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name, /* in: file name where mutex + requested */ + ulint line) /* in: line where requested */ { ut_ad(mutex_validate(mutex)); if (!mutex_test_and_set(mutex)) { #ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, IB__FILE__, __LINE__); + mutex_set_debug_info(mutex, file_name, line); #endif + mutex->file_name = file_name; + mutex->line = line; + return(0); /* Succeeded! */ } @@ -349,13 +354,9 @@ for the mutex before suspending the thread. */ void mutex_spin_wait( /*============*/ - mutex_t* mutex /* in: pointer to mutex */ - - #ifdef UNIV_SYNC_DEBUG - ,char* file_name, /* in: file name where mutex requested */ - ulint line /* in: line where requested */ - #endif -) + mutex_t* mutex, /* in: pointer to mutex */ + char* file_name, /* in: file name where mutex requested */ + ulint line) /* in: line where requested */ { ulint index; /* index of the reserved wait cell */ ulint i; /* spin round count */ @@ -391,7 +392,7 @@ spin_loop: if (srv_print_latch_waits) { printf( "Thread %lu spin wait mutex at %lx cfile %s cline %lu rnds %lu\n", - os_thread_get_curr_id(), (ulint)mutex, &(mutex->cfile_name), + os_thread_get_curr_id(), (ulint)mutex, mutex->cfile_name, mutex->cline, i); } @@ -404,6 +405,9 @@ spin_loop: mutex_set_debug_info(mutex, file_name, line); #endif + mutex->file_name = file_name; + mutex->line = line; + return; } @@ -423,9 +427,7 @@ spin_loop: sync_array_reserve_cell(sync_primary_wait_array, mutex, SYNC_MUTEX, - #ifdef UNIV_SYNC_DEBUG file_name, line, - #endif &index); mutex_system_call_count++; @@ -438,7 +440,9 @@ spin_loop: mutex_set_waiters(mutex, 1); - if (mutex_test_and_set(mutex) == 0) { + /* Try to reserve still a few times */ + for (i = 0; i < 4; i++) { + if (mutex_test_and_set(mutex) == 0) { /* Succeeded! Free the reserved wait cell */ @@ -448,6 +452,9 @@ spin_loop: mutex_set_debug_info(mutex, file_name, line); #endif + mutex->file_name = file_name; + mutex->line = line; + if (srv_print_latch_waits) { printf( "Thread %lu spin wait succeeds at 2: mutex at %lx\n", @@ -459,6 +466,7 @@ spin_loop: /* Note that in this case we leave the waiters field set to 1. We cannot reset it to zero, as we do not know if there are other waiters. */ + } } /* Now we know that there has been some thread holding the mutex @@ -468,11 +476,13 @@ spin_loop: if (srv_print_latch_waits) { printf( "Thread %lu OS wait mutex at %lx cfile %s cline %lu rnds %lu\n", - os_thread_get_curr_id(), (ulint)mutex, &(mutex->cfile_name), + os_thread_get_curr_id(), (ulint)mutex, mutex->cfile_name, mutex->cline, i); } mutex_system_call_count++; + mutex_os_wait_count++; + sync_array_wait_event(sync_primary_wait_array, index); goto mutex_loop; @@ -578,7 +588,6 @@ mutex_list_print_info(void) /*=======================*/ { #ifndef UNIV_SYNC_DEBUG - printf("Sorry, cannot give mutex list info in non-debug version!\n"); #else mutex_t* mutex; char* file_name; @@ -586,8 +595,9 @@ mutex_list_print_info(void) os_thread_id_t thread_id; ulint count = 0; - printf("-----------------------------------------------\n"); + printf("----------\n"); printf("MUTEX INFO\n"); + printf("----------\n"); mutex_enter(&mutex_list_mutex); @@ -597,10 +607,10 @@ mutex_list_print_info(void) count++; if (mutex_get_lock_word(mutex) != 0) { - - mutex_get_debug_info(mutex, &file_name, &line, &thread_id); - - printf("Locked mutex: addr %lx thread %ld file %s line %ld\n", + mutex_get_debug_info(mutex, &file_name, &line, + &thread_id); + printf( + "Locked mutex: addr %lx thread %ld file %s line %ld\n", (ulint)mutex, thread_id, file_name, line); } @@ -791,7 +801,7 @@ sync_thread_levels_g( limit, slot->level); if (mutex->magic_n == MUTEX_MAGIC_N) { - printf("Mutex created at %s %lu\n", &(mutex->cfile_name), + printf("Mutex created at %s %lu\n", mutex->cfile_name, mutex->cline); if (mutex_get_lock_word(mutex) != 0) { @@ -890,6 +900,7 @@ sync_thread_levels_empty_gen( if (slot->latch != NULL && (!dict_mutex_allowed || (slot->level != SYNC_DICT + && slot->level != SYNC_FOREIGN_KEY_CHECK && slot->level != SYNC_PURGE_IS_RUNNING))) { lock = slot->latch; @@ -993,6 +1004,8 @@ sync_thread_add_level( ut_a(sync_thread_levels_g(array, SYNC_RECV)); } else if (level == SYNC_LOG) { ut_a(sync_thread_levels_g(array, SYNC_LOG)); + } else if (level == SYNC_THR_LOCAL) { + ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL)); } else if (level == SYNC_ANY_LATCH) { ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH)); } else if (level == SYNC_TRX_SYS_HEADER) { @@ -1071,6 +1084,8 @@ sync_thread_add_level( SYNC_IBUF_PESS_INSERT_MUTEX)); } else if (level == SYNC_DICT_AUTOINC_MUTEX) { ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX)); + } else if (level == SYNC_FOREIGN_KEY_CHECK) { + ut_a(sync_thread_levels_g(array, SYNC_FOREIGN_KEY_CHECK)); } else if (level == SYNC_DICT_HEADER) { ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER)); } else if (level == SYNC_PURGE_IS_RUNNING) { @@ -1231,15 +1246,17 @@ void sync_print_wait_info(void) /*======================*/ { +#ifdef UNIV_SYNC_DEBUG + printf("Mutex exits %lu, rws exits %lu, rwx exits %lu\n", + mutex_exit_count, rw_s_exit_count, rw_x_exit_count); +#endif printf( - "Mut ex %lu sp %lu r %lu sys %lu; rws %lu %lu %lu; rwx %lu %lu %lu\n", - mutex_exit_count, +"Mutex spin waits %lu, rounds %lu, OS waits %lu\n" +"RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n", mutex_spin_wait_count, mutex_spin_round_count, - mutex_system_call_count, - rw_s_exit_count, - rw_s_spin_wait_count, rw_s_system_call_count, - rw_x_exit_count, - rw_x_spin_wait_count, rw_x_system_call_count); + mutex_os_wait_count, + rw_s_spin_wait_count, rw_s_os_wait_count, + rw_x_spin_wait_count, rw_x_os_wait_count); } /*********************************************************************** @@ -1249,10 +1266,8 @@ void sync_print(void) /*============*/ { - printf("SYNC INFO:\n"); mutex_list_print_info(); rw_lock_list_print_info(); sync_array_print_info(sync_primary_wait_array); sync_print_wait_info(); - printf("-----------------------------------------------------\n"); } diff --git a/innobase/thr/thr0loc.c b/innobase/thr/thr0loc.c index 897e53557c3..d3d7a58d313 100644 --- a/innobase/thr/thr0loc.c +++ b/innobase/thr/thr0loc.c @@ -224,5 +224,5 @@ thr_local_init(void) thr_local_hash = hash_create(OS_THREAD_MAX_N + 100); mutex_create(&thr_local_mutex); - mutex_set_level(&thr_local_mutex, SYNC_ANY_LATCH); + mutex_set_level(&thr_local_mutex, SYNC_THR_LOCAL); } diff --git a/innobase/trx/trx0purge.c b/innobase/trx/trx0purge.c index afb83926fa3..c50ffb65e00 100644 --- a/innobase/trx/trx0purge.c +++ b/innobase/trx/trx0purge.c @@ -276,6 +276,12 @@ trx_purge_add_update_undo_to_history( if (undo->state != TRX_UNDO_CACHED) { /* The undo log segment will not be reused */ + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", undo->id); + ut_a(0); + } + trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c index 64febb8f523..73153cfaa37 100644 --- a/innobase/trx/trx0rec.c +++ b/innobase/trx/trx0rec.c @@ -800,7 +800,7 @@ trx_undo_update_rec_get_update( TRX_UNDO_DEL_MARK_REC; in the last case, only trx id and roll ptr fields are added to the update vector */ - dulint trx_id, /* in: transaction id from this undorecord */ + dulint trx_id, /* in: transaction id from this undo record */ dulint roll_ptr,/* in: roll pointer from this undo record */ ulint info_bits,/* in: info bits from this undo record */ mem_heap_t* heap, /* in: memory heap from which the memory @@ -1078,9 +1078,7 @@ trx_undo_report_row_operation( undo_page = buf_page_get_gen(undo->space, page_no, RW_X_LATCH, undo->guess_page, BUF_GET, - #ifdef UNIV_SYNC_DEBUG IB__FILE__, __LINE__, - #endif &mtr); buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE); @@ -1282,7 +1280,7 @@ trx_undo_prev_version_build( return(DB_SUCCESS); } - rec_trx_id = row_get_rec_trx_id(rec, index); + rec_trx_id = row_get_rec_trx_id(rec, index); err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap); diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index 5d8c57edf34..13b37775dce 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -109,7 +109,10 @@ trx_create( UT_LIST_INIT(trx->trx_locks); trx->has_search_latch = FALSE; + trx->search_latch_timeout = BTR_SEA_TIMEOUT; + trx->auto_inc_lock = NULL; + trx->read_view_heap = mem_heap_create(256); trx->read_view = NULL; @@ -193,6 +196,7 @@ trx_free( ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0); ut_a(!trx->has_search_latch); + ut_a(!trx->auto_inc_lock); if (trx->lock_heap) { mem_heap_free(trx->lock_heap); diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c index 1f408428582..598090bdee2 100644 --- a/innobase/trx/trx0undo.c +++ b/innobase/trx/trx0undo.c @@ -361,6 +361,8 @@ trx_undo_page_init( mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); + fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG); + trx_undo_page_init_log(undo_page, type, mtr); } @@ -1106,6 +1108,12 @@ trx_undo_mem_create_at_db_start( page_t* last_page; trx_undo_rec_t* rec; + if (id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", id); + ut_a(0); + } + undo_page = trx_undo_page_get(rseg->space, page_no, mtr); page_header = undo_page + TRX_UNDO_PAGE_HDR; @@ -1251,7 +1259,13 @@ trx_undo_mem_create( trx_undo_t* undo; ut_ad(mutex_own(&(rseg->mutex))); - + + if (id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", id); + ut_a(0); + } + undo = mem_alloc(sizeof(trx_undo_t)); undo->id = id; @@ -1290,6 +1304,12 @@ trx_undo_mem_init_for_reuse( { ut_ad(mutex_own(&((undo->rseg)->mutex))); + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", undo->id); + ut_a(0); + } + undo->state = TRX_UNDO_ACTIVE; undo->del_marks = FALSE; undo->trx_id = trx_id; @@ -1308,6 +1328,12 @@ trx_undo_mem_free( /*==============*/ trx_undo_t* undo) /* in: the undo object to be freed */ { + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", undo->id); + ut_a(0); + } + mem_free(undo); } @@ -1493,6 +1519,9 @@ trx_undo_assign_undo( mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); + fprintf(stderr, "InnoDB: no undo log slots free\n"); + ut_a(0); + return(NULL); } } @@ -1536,6 +1565,12 @@ trx_undo_set_state_at_finish( ut_ad(trx && undo && mtr); + if (undo->id >= TRX_RSEG_N_SLOTS) { + fprintf(stderr, + "InnoDB: Error: undo->id is %lu\n", undo->id); + ut_a(0); + } + undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); seg_hdr = undo_page + TRX_UNDO_SEG_HDR; diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c index ebeefe0c297..630bd3a9b71 100644 --- a/innobase/ut/ut0mem.c +++ b/innobase/ut/ut0mem.c @@ -13,15 +13,22 @@ Created 5/11/1994 Heikki Tuuri #endif #include "mem0mem.h" - +#include "os0sync.h" /* This struct is placed first in every allocated memory block */ typedef struct ut_mem_block_struct ut_mem_block_t; +/* The total amount of memory currently allocated from the OS with malloc */ +ulint ut_total_allocated_memory = 0; + struct ut_mem_block_struct{ - UT_LIST_NODE_T(ut_mem_block_t) mem_block_list;/* mem block list node */ + UT_LIST_NODE_T(ut_mem_block_t) mem_block_list; + /* mem block list node */ + ulint size; /* size of allocated memory */ + ulint magic_n; }; +#define UT_MEM_MAGIC_N 1601650166 /* List of all memory blocks allocated from the operating system with malloc */ @@ -70,16 +77,17 @@ ut_malloc_low( if (ret == NULL) { fprintf(stderr, "InnoDB: Fatal error: cannot allocate %lu bytes of\n" - "InnoDB: memory with malloc!\n" - "InnoDB: Operating system errno: %lu\n" + "InnoDB: memory with malloc! Total allocated memory\n" + "InnoDB: by InnoDB %lu bytes. Operating system errno: %lu\n" "InnoDB: Cannot continue operation!\n" "InnoDB: Check if you should increase the swap file or\n" - "InnoDB: ulimits of your operating system.\n", n, errno); + "InnoDB: ulimits of your operating system.\n", + n, ut_total_allocated_memory, errno); os_fast_mutex_unlock(&ut_list_mutex); exit(1); - } + } if (set_to_zero) { #ifdef UNIV_SET_MEM_TO_ZERO @@ -87,6 +95,11 @@ ut_malloc_low( #endif } + ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t); + ((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N; + + ut_total_allocated_memory += n + sizeof(ut_mem_block_t); + UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list, ((ut_mem_block_t*)ret)); os_fast_mutex_unlock(&ut_list_mutex); @@ -107,7 +120,7 @@ ut_malloc( return(ut_malloc_low(n, TRUE)); } /************************************************************************** -Frees a memory bloock allocated with ut_malloc. */ +Frees a memory block allocated with ut_malloc. */ void ut_free( @@ -120,6 +133,11 @@ ut_free( os_fast_mutex_lock(&ut_list_mutex); + ut_a(block->magic_n == UT_MEM_MAGIC_N); + ut_a(ut_total_allocated_memory >= block->size); + + ut_total_allocated_memory -= block->size; + UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block); free(block); @@ -139,11 +157,18 @@ ut_free_all_mem(void) while (block = UT_LIST_GET_FIRST(ut_mem_block_list)) { + ut_a(block->magic_n == UT_MEM_MAGIC_N); + ut_a(ut_total_allocated_memory >= block->size); + + ut_total_allocated_memory -= block->size; + UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block); free(block); } os_fast_mutex_unlock(&ut_list_mutex); + + ut_a(ut_total_allocated_memory == 0); } /************************************************************************** diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c index 1436f6a10a3..964d5bca567 100644 --- a/innobase/ut/ut0ut.c +++ b/innobase/ut/ut0ut.c @@ -187,6 +187,8 @@ ut_sprintf_buf( for (i = 0; i < len; i++) { if (isprint((char)(*data))) { n += sprintf(str + n, "%c", (char)*data); + } else { + n += sprintf(str + n, "."); } data++; |