diff options
author | Ignacio Galarza <iggy@mysql.com> | 2009-03-19 09:44:58 -0400 |
---|---|---|
committer | Ignacio Galarza <iggy@mysql.com> | 2009-03-19 09:44:58 -0400 |
commit | 675c3ce2bbe7f033930822dd0ef9126ebb25d44b (patch) | |
tree | 0d1697348f5ffacdf22ddce0537910d41d6482f0 /innobase | |
parent | 54fbbf9591e21cda9f7b26c2d795d88f51827f07 (diff) | |
parent | 0d18d930aed47ed97ef3309f9510cfd7a366202e (diff) | |
download | mariadb-git-675c3ce2bbe7f033930822dd0ef9126ebb25d44b.tar.gz |
auto-merge
Diffstat (limited to 'innobase')
-rw-r--r-- | innobase/buf/buf0lru.c | 127 | ||||
-rw-r--r-- | innobase/dict/dict0crea.c | 3 | ||||
-rw-r--r-- | innobase/include/db0err.h | 5 | ||||
-rw-r--r-- | innobase/include/rem0rec.h | 11 | ||||
-rw-r--r-- | innobase/include/rem0rec.ic | 19 | ||||
-rw-r--r-- | innobase/include/trx0undo.h | 13 | ||||
-rw-r--r-- | innobase/row/row0mysql.c | 3 | ||||
-rw-r--r-- | innobase/trx/trx0rec.c | 15 | ||||
-rw-r--r-- | innobase/trx/trx0undo.c | 105 |
9 files changed, 229 insertions, 72 deletions
diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c index 0f632f0752a..6984c196701 100644 --- a/innobase/buf/buf0lru.c +++ b/innobase/buf/buf0lru.c @@ -42,6 +42,11 @@ initial segment in buf_LRU_get_recent_limit */ #define BUF_LRU_INITIAL_RATIO 8 +/* When dropping the search hash index entries before deleting an ibd +file, we build a local array of pages belonging to that tablespace +in the buffer pool. Following is the size of that array. */ +#define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024 + /* If we switch on the InnoDB monitor because there are too few available frames in the buffer pool, we set this to TRUE */ ibool buf_lru_switched_on_innodb_mon = FALSE; @@ -66,6 +71,120 @@ buf_LRU_block_free_hashed_page( be in a state where it can be freed */ /********************************************************************** +Attempts to drop page hash index on a batch of pages belonging to a +particular space id. */ +static +void +buf_LRU_drop_page_hash_batch( +/*=========================*/ + ulint id, /* in: space id */ + const ulint* arr, /* in: array of page_no */ + ulint count) /* in: number of entries in array */ +{ + ulint i; + + ut_ad(arr != NULL); + ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE); + + for (i = 0; i < count; ++i) { + btr_search_drop_page_hash_when_freed(id, arr[i]); + } +} + +/********************************************************************** +When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page +hash index entries belonging to that table. This function tries to +do that in batch. Note that this is a 'best effort' attempt and does +not guarantee that ALL hash entries will be removed. */ +static +void +buf_LRU_drop_page_hash_for_tablespace( +/*==================================*/ + ulint id) /* in: space id */ +{ + buf_block_t* block; + ulint* page_arr; + ulint num_entries; + + page_arr = ut_malloc(sizeof(ulint) + * BUF_LRU_DROP_SEARCH_HASH_SIZE); + mutex_enter(&buf_pool->mutex); + +scan_again: + num_entries = 0; + block = UT_LIST_GET_LAST(buf_pool->LRU); + + while (block != NULL) { + buf_block_t* prev_block; + + mutex_enter(&block->mutex); + prev_block = UT_LIST_GET_PREV(LRU, block); + + ut_a(block->state == BUF_BLOCK_FILE_PAGE); + + if (block->space != id + || block->buf_fix_count > 0 + || block->io_fix != 0) { + /* We leave the fixed pages as is in this scan. + To be dealt with later in the final scan. */ + mutex_exit(&block->mutex); + goto next_page; + } + + ut_ad(block->space == id); + if (block->is_hashed) { + + /* Store the offset(i.e.: page_no) in the array + so that we can drop hash index in a batch + later. */ + page_arr[num_entries] = block->offset; + mutex_exit(&block->mutex); + ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); + ++num_entries; + + if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { + goto next_page; + } + /* Array full. We release the buf_pool->mutex to + obey the latching order. */ + mutex_exit(&buf_pool->mutex); + + buf_LRU_drop_page_hash_batch(id, page_arr, + num_entries); + num_entries = 0; + mutex_enter(&buf_pool->mutex); + } else { + mutex_exit(&block->mutex); + } + +next_page: + /* Note that we may have released the buf_pool->mutex + above after reading the prev_block during processing + of a page_hash_batch (i.e.: when the array was full). + This means that prev_block can change in LRU list. + This is OK because this function is a 'best effort' + to drop as many search hash entries as possible and + it does not guarantee that ALL such entries will be + dropped. */ + block = prev_block; + + /* If, however, block has been removed from LRU list + to the free list then we should restart the scan. + block->state is protected by buf_pool->mutex. */ + if (block && block->state != BUF_BLOCK_FILE_PAGE) { + ut_a(num_entries == 0); + goto scan_again; + } + } + + mutex_exit(&buf_pool->mutex); + + /* Drop any remaining batch of search hashed pages. */ + buf_LRU_drop_page_hash_batch(id, page_arr, num_entries); + ut_free(page_arr); +} + +/********************************************************************** Invalidates all pages belonging to a given tablespace when we are deleting the data file(s) of that tablespace. */ @@ -78,6 +197,14 @@ buf_LRU_invalidate_tablespace( ulint page_no; ibool all_freed; + /* Before we attempt to drop pages one by one we first + attempt to drop page hash index entries in batches to make + it more efficient. The batching attempt is a best effort + attempt and does not guarantee that all pages hash entries + will be dropped. We get rid of remaining page hash entries + one by one below. */ + buf_LRU_drop_page_hash_for_tablespace(id); + scan_again: mutex_enter(&(buf_pool->mutex)); diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c index e20d8b6e83a..12d99734796 100644 --- a/innobase/dict/dict0crea.c +++ b/innobase/dict/dict0crea.c @@ -1249,7 +1249,8 @@ dict_create_or_check_foreign_constraint_tables(void) fprintf(stderr, "InnoDB: error %lu in creation\n", (ulong) error); - ut_a(error == DB_OUT_OF_FILE_SPACE); + ut_a(error == DB_OUT_OF_FILE_SPACE + || error == DB_TOO_MANY_CONCURRENT_TRXS); fprintf(stderr, "InnoDB: creation failed\n"); fprintf(stderr, "InnoDB: tablespace is full\n"); diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h index 247c5de67db..68bdcdc8b7f 100644 --- a/innobase/include/db0err.h +++ b/innobase/include/db0err.h @@ -70,6 +70,11 @@ Created 5/24/1996 Heikki Tuuri work with e.g., FT indexes created by a later version of the engine. */ +#define DB_TOO_MANY_CONCURRENT_TRXS 47 /* when InnoDB runs out of the + preconfigured undo slots, this can + only happen when there are too many + concurrent transactions */ + /* The following are partial failure codes */ #define DB_FAIL 1000 #define DB_OVERFLOW 1001 diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h index 69b397c9682..b573c5d4c3b 100644 --- a/innobase/include/rem0rec.h +++ b/innobase/include/rem0rec.h @@ -368,8 +368,9 @@ rec_set_field_extern_bits( /*************************************************************** This is used to modify the value of an already existing field in a record. The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null for old-style -records. For new-style records, len must not be UNIV_SQL_NULL. */ +is UNIV_SQL_NULL then the field is treated as an SQL null. +For records in ROW_FORMAT=COMPACT (new-style records), len must not be +UNIV_SQL_NULL unless the field already is SQL null. */ UNIV_INLINE void rec_set_nth_field( @@ -378,11 +379,7 @@ rec_set_nth_field( const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint n, /* in: index number of the field */ const void* data, /* in: pointer to the data if not SQL null */ - ulint len); /* in: length of the data or UNIV_SQL_NULL. - If not SQL null, must have the same - length as the previous value. - If SQL null, previous value must be - SQL null. */ + ulint len); /* in: length of the data or UNIV_SQL_NULL */ /************************************************************** The following function returns the data size of an old-style physical record, that is the sum of field lengths. SQL null fields diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic index 1abbb503bab..64c91724386 100644 --- a/innobase/include/rem0rec.ic +++ b/innobase/include/rem0rec.ic @@ -1204,8 +1204,9 @@ rec_get_nth_field_size( /*************************************************************** This is used to modify the value of an already existing field in a record. The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null for old-style -records. For new-style records, len must not be UNIV_SQL_NULL. */ +is UNIV_SQL_NULL then the field is treated as an SQL null. +For records in ROW_FORMAT=COMPACT (new-style records), len must not be +UNIV_SQL_NULL unless the field already is SQL null. */ UNIV_INLINE void rec_set_nth_field( @@ -1215,11 +1216,7 @@ rec_set_nth_field( ulint n, /* in: index number of the field */ const void* data, /* in: pointer to the data if not SQL null */ - ulint len) /* in: length of the data or UNIV_SQL_NULL. - If not SQL null, must have the same - length as the previous value. - If SQL null, previous value must be - SQL null. */ + ulint len) /* in: length of the data or UNIV_SQL_NULL */ { byte* data2; ulint len2; @@ -1227,9 +1224,11 @@ rec_set_nth_field( ut_ad(rec); ut_ad(rec_offs_validate(rec, NULL, offsets)); - if (len == UNIV_SQL_NULL) { - ut_ad(!rec_offs_comp(offsets)); - rec_set_nth_field_sql_null(rec, n); + if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) { + if (!rec_offs_nth_sql_null(offsets, n)) { + ut_a(!rec_offs_comp(offsets)); + rec_set_nth_field_sql_null(rec, n); + } return; } diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h index 4f1847aa88c..152a09c0f76 100644 --- a/innobase/include/trx0undo.h +++ b/innobase/include/trx0undo.h @@ -222,13 +222,16 @@ trx_undo_lists_init( Assigns an undo log for a transaction. A new undo log is created or a cached undo log reused. */ -trx_undo_t* +ulint trx_undo_assign_undo( /*=================*/ - /* out: the undo log, NULL if did not succeed: out of - space */ - trx_t* trx, /* in: transaction */ - ulint type); /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ + /* out: DB_SUCCESS if undo log assign + * successful, possible error codes are: + * ER_TOO_MANY_CONCURRENT_TRXS + * DB_OUT_OF_FILE_SPAC + * DB_OUT_OF_MEMORY */ + trx_t* trx, /* in: transaction */ + ulint type); /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ /********************************************************************** Sets the state of the undo log segment at a transaction finish. */ diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 4bc5f39359c..d7213b25145 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -494,7 +494,8 @@ handle_new_error: /* MySQL will roll back the latest SQL statement */ } else if (err == DB_ROW_IS_REFERENCED || err == DB_NO_REFERENCED_ROW - || err == DB_CANNOT_ADD_CONSTRAINT) { + || err == DB_CANNOT_ADD_CONSTRAINT + || err == DB_TOO_MANY_CONCURRENT_TRXS) { if (savept) { /* Roll back the latest, possibly incomplete insertion or update */ diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c index 3b7171e6038..44b734625dd 100644 --- a/innobase/trx/trx0rec.c +++ b/innobase/trx/trx0rec.c @@ -1013,6 +1013,7 @@ trx_undo_report_row_operation( ibool is_insert; trx_rseg_t* rseg; mtr_t mtr; + ulint err = DB_SUCCESS; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; @@ -1024,7 +1025,7 @@ trx_undo_report_row_operation( *roll_ptr = ut_dulint_zero; - return(DB_SUCCESS); + return(err); } ut_ad(thr); @@ -1042,7 +1043,7 @@ trx_undo_report_row_operation( if (trx->insert_undo == NULL) { - trx_undo_assign_undo(trx, TRX_UNDO_INSERT); + err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT); } undo = trx->insert_undo; @@ -1052,7 +1053,7 @@ trx_undo_report_row_operation( if (trx->update_undo == NULL) { - trx_undo_assign_undo(trx, TRX_UNDO_UPDATE); + err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE); } @@ -1060,11 +1061,11 @@ trx_undo_report_row_operation( is_insert = FALSE; } - if (undo == NULL) { - /* Did not succeed: out of space */ + if (err != DB_SUCCESS) { + /* Did not succeed: return the error encountered */ mutex_exit(&(trx->undo_mutex)); - return(DB_OUT_OF_FILE_SPACE); + return(err); } page_no = undo->last_page_no; @@ -1154,7 +1155,7 @@ trx_undo_report_row_operation( if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } - return(DB_SUCCESS); + return(err); } /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/ diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c index 251cd355897..997f25a66d8 100644 --- a/innobase/trx/trx0undo.c +++ b/innobase/trx/trx0undo.c @@ -374,27 +374,32 @@ trx_undo_page_init( /******************************************************************* Creates a new undo log segment in file. */ static -page_t* +ulint trx_undo_seg_create( /*================*/ - /* out: segment header page x-latched, NULL - if no space left */ + /* out: DB_SUCCESS if page creation OK + possible error codes are: + DB_TOO_MANY_CONCURRENT_TRXS + DB_OUT_OF_FILE_SPACE */ trx_rseg_t* rseg __attribute__((unused)),/* in: rollback segment */ trx_rsegf_t* rseg_hdr,/* in: rollback segment header, page x-latched */ ulint type, /* in: type of the segment: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ ulint* id, /* out: slot index within rseg header */ + page_t** undo_page, + /* out: segment header page x-latched, NULL + if there was an error */ mtr_t* mtr) /* in: mtr */ { ulint slot_no; ulint space; - page_t* undo_page; trx_upagef_t* page_hdr; trx_usegf_t* seg_hdr; ulint n_reserved; ibool success; - + ulint err = DB_SUCCESS; + ut_ad(mtr && id && rseg_hdr); #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(rseg->mutex))); @@ -411,7 +416,7 @@ trx_undo_seg_create( "InnoDB: Warning: cannot find a free slot for an undo log. Do you have too\n" "InnoDB: many active transactions running concurrently?\n"); - return(NULL); + return(DB_TOO_MANY_CONCURRENT_TRXS); } space = buf_frame_get_space_id(rseg_hdr); @@ -420,29 +425,29 @@ trx_undo_seg_create( mtr); if (!success) { - return(NULL); + return(DB_OUT_OF_FILE_SPACE); } /* Allocate a new file segment for the undo log */ - undo_page = fseg_create_general(space, 0, + *undo_page = fseg_create_general(space, 0, TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, TRUE, mtr); fil_space_release_free_extents(space, n_reserved); - if (undo_page == NULL) { + if (*undo_page == NULL) { /* No space left */ - return(NULL); + return(DB_OUT_OF_FILE_SPACE); } #ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE); + buf_page_dbg_add_level(*undo_page, SYNC_TRX_UNDO_PAGE); #endif /* UNIV_SYNC_DEBUG */ - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; + page_hdr = *undo_page + TRX_UNDO_PAGE_HDR; + seg_hdr = *undo_page + TRX_UNDO_SEG_HDR; - trx_undo_page_init(undo_page, type, mtr); + trx_undo_page_init(*undo_page, type, mtr); mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE, @@ -456,10 +461,11 @@ trx_undo_seg_create( page_hdr + TRX_UNDO_PAGE_NODE, mtr); trx_rsegf_set_nth_undo(rseg_hdr, slot_no, - buf_frame_get_page_no(undo_page), mtr); + buf_frame_get_page_no(*undo_page), mtr); + *id = slot_no; - return(undo_page); + return(err); } /************************************************************************** @@ -1400,6 +1406,11 @@ trx_undo_mem_create( undo = mem_alloc(sizeof(trx_undo_t)); + if (undo == NULL) { + + return NULL; + } + undo->id = id; undo->type = type; undo->state = TRX_UNDO_ACTIVE; @@ -1479,11 +1490,15 @@ trx_undo_mem_free( /************************************************************************** Creates a new undo log. */ static -trx_undo_t* +ulint trx_undo_create( /*============*/ - /* out: undo log object, NULL if did not - succeed: out of space */ + /* out: DB_SUCCESS if successful in creating + the new undo lob object, possible error + codes are: + DB_TOO_MANY_CONCURRENT_TRXS + DB_OUT_OF_FILE_SPACE + DB_OUT_OF_MEMORY*/ trx_t* trx, /* in: transaction */ trx_rseg_t* rseg, /* in: rollback segment memory copy */ ulint type, /* in: type of the log: TRX_UNDO_INSERT or @@ -1491,36 +1506,39 @@ trx_undo_create( dulint trx_id, /* in: id of the trx for which the undo log is created */ XID* xid, /* in: X/Open transaction identification*/ + trx_undo_t** undo, /* out: the new undo log object, undefined + * if did not succeed */ mtr_t* mtr) /* in: mtr */ { trx_rsegf_t* rseg_header; ulint page_no; ulint offset; ulint id; - trx_undo_t* undo; page_t* undo_page; - + ulint err; + #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(rseg->mutex))); #endif /* UNIV_SYNC_DEBUG */ if (rseg->curr_size == rseg->max_size) { - return(NULL); + return(DB_OUT_OF_FILE_SPACE); } rseg->curr_size++; rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr); - undo_page = trx_undo_seg_create(rseg, rseg_header, type, &id, mtr); + err = trx_undo_seg_create(rseg, rseg_header, type, &id, + &undo_page, mtr); - if (undo_page == NULL) { + if (err != DB_SUCCESS) { /* Did not succeed */ rseg->curr_size--; - return(NULL); + return(err); } page_no = buf_frame_get_page_no(undo_page); @@ -1532,9 +1550,14 @@ trx_undo_create( undo_page + offset, mtr); } - undo = trx_undo_mem_create(rseg, id, type, trx_id, xid, + *undo = trx_undo_mem_create(rseg, id, type, trx_id, xid, page_no, offset); - return(undo); + if (*undo == NULL) { + + err = DB_OUT_OF_MEMORY; + } + + return(err); } /*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/ @@ -1653,17 +1676,20 @@ trx_undo_mark_as_dict_operation( Assigns an undo log for a transaction. A new undo log is created or a cached undo log reused. */ -trx_undo_t* +ulint trx_undo_assign_undo( /*=================*/ - /* out: the undo log, NULL if did not succeed: out of - space */ - trx_t* trx, /* in: transaction */ - ulint type) /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ + /* out: DB_SUCCESS if undo log assign + successful, possible error codes are: + DD_TOO_MANY_CONCURRENT_TRXS + DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY*/ + trx_t* trx, /* in: transaction */ + ulint type) /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ { trx_rseg_t* rseg; trx_undo_t* undo; mtr_t mtr; + ulint err = DB_SUCCESS; ut_ad(trx); ut_ad(trx->rseg); @@ -1684,15 +1710,11 @@ trx_undo_assign_undo( undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid, &mtr); if (undo == NULL) { - undo = trx_undo_create(trx, rseg, type, trx->id, &trx->xid, - &mtr); - if (undo == NULL) { - /* Did not succeed */ + err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid, + &undo, &mtr); + if (err != DB_SUCCESS) { - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - return(NULL); + goto func_exit; } } @@ -1710,10 +1732,11 @@ trx_undo_assign_undo( trx_undo_mark_as_dict_operation(trx, undo, &mtr); } +func_exit: mutex_exit(&(rseg->mutex)); mtr_commit(&mtr); - return(undo); + return err; } /********************************************************************** |