diff options
author | unknown <knielsen@knielsen-hq.org> | 2010-01-15 16:58:25 +0100 |
---|---|---|
committer | unknown <knielsen@knielsen-hq.org> | 2010-01-15 16:58:25 +0100 |
commit | 13029651b5535d637aa6f652f17c8c4417a803bc (patch) | |
tree | b65c4d4bf3227b2e2b86c13f4d098b57e9a60410 /storage/xtradb/include | |
parent | d121e6630519a66eec7c953ee0eae623f592ce87 (diff) | |
parent | 5b0ab60a4bfb4d5b603cd929b7ceb0d9fd1bac22 (diff) | |
download | mariadb-git-13029651b5535d637aa6f652f17c8c4417a803bc.tar.gz |
Merge XtraDB 9 into MariaDB.
Diffstat (limited to 'storage/xtradb/include')
52 files changed, 729 insertions, 306 deletions
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index b2d43ae3254..480a3877e54 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -618,7 +618,7 @@ enum btr_cur_method { hash_node, and might be necessary to update */ BTR_CUR_BINARY, /*!< success using the binary search */ - BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to + BTR_CUR_INSERT_TO_IBUF /*!< performed the intended insert to the insert buffer */ }; diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h index 5c50829e874..7f4842d0897 100644 --- a/storage/xtradb/include/btr0sea.h +++ b/storage/xtradb/include/btr0sea.h @@ -41,6 +41,12 @@ void btr_search_sys_create( /*==================*/ ulint hash_size); /*!< in: hash index hash table size */ +/*****************************************************************//** +Frees the adaptive search system at a database shutdown. */ +UNIV_INTERN +void +btr_search_sys_free(void); +/*=====================*/ /********************************************************************//** Disable the adaptive hash search system and empty the index. */ diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 8063f9ec2c8..ddd08c49a12 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -346,7 +346,7 @@ buf_page_release( mtr_t* mtr); /*!< in: mtr */ /********************************************************************//** Moves a page to the start of the buffer pool LRU list. This high-level -function can be used to prevent an important page from from slipping out of +function can be used to prevent an important page from slipping out of the buffer pool. */ UNIV_INTERN void @@ -707,15 +707,6 @@ buf_page_belongs_to_unzip_LRU( /*==========================*/ const buf_page_t* bpage) /*!< in: pointer to control block */ __attribute__((pure)); -/*********************************************************************//** -Determine the approximate LRU list position of a block. -@return LRU list position */ -UNIV_INLINE -ulint -buf_page_get_LRU_position( -/*======================*/ - const buf_page_t* bpage) /*!< in: control block */ - __attribute__((pure)); /*********************************************************************//** Gets the mutex of a block. @@ -825,14 +816,14 @@ buf_page_set_old( buf_page_t* bpage, /*!< in/out: control block */ ibool old); /*!< in: old */ /*********************************************************************//** -Determine if a block has been accessed in the buffer pool. -@return TRUE if accessed */ +Determine the time of first access of a block in the buffer pool. +@return ut_time_ms() at the time of first access, 0 if not accessed */ UNIV_INLINE -ibool +unsigned buf_page_is_accessed( /*=================*/ const buf_page_t* bpage) /*!< in: control block */ - __attribute__((pure)); + __attribute__((nonnull, pure)); /*********************************************************************//** Flag a block accessed. */ UNIV_INLINE @@ -840,7 +831,8 @@ void buf_page_set_accessed( /*==================*/ buf_page_t* bpage, /*!< in/out: control block */ - ibool accessed); /*!< in: accessed */ + ulint time_ms) /*!< in: ut_time_ms() */ + __attribute__((nonnull)); /*********************************************************************//** Gets the buf_block_t handle of a buffered file block if an uncompressed page frame exists, or NULL. @@ -1026,14 +1018,6 @@ buf_block_hash_get( /*===============*/ ulint space, /*!< in: space id */ ulint offset);/*!< in: offset of the page within space */ -/*******************************************************************//** -Increments the pool clock by one and returns its new value. Remember that -in the 32 bit version the clock wraps around at 4 billion! -@return new clock value */ -UNIV_INLINE -ulint -buf_pool_clock_tic(void); -/*====================*/ /*********************************************************************//** Gets the current length of the free list of buffer blocks. @return length of the free list */ @@ -1073,16 +1057,10 @@ struct buf_page_struct{ flushed to disk, this tells the flush_type. @see enum buf_flush */ - unsigned accessed:1; /*!< TRUE if the page has been accessed - while in the buffer pool: read-ahead - may read in pages which have not been - accessed yet; a thread is allowed to - read this for heuristic purposes - without holding any mutex or latch */ unsigned io_fix:2; /*!< type of pending I/O operation; also protected by buf_pool_mutex @see enum buf_io_fix */ - unsigned buf_fix_count:24;/*!< count of how manyfold this block + unsigned buf_fix_count:25;/*!< count of how manyfold this block is currently bufferfixed */ /* @} */ #endif /* !UNIV_HOTBACKUP */ @@ -1112,7 +1090,16 @@ struct buf_page_struct{ - BUF_BLOCK_FILE_PAGE: flush_list - BUF_BLOCK_ZIP_DIRTY: flush_list - BUF_BLOCK_ZIP_PAGE: zip_clean - - BUF_BLOCK_ZIP_FREE: zip_free[] */ + - BUF_BLOCK_ZIP_FREE: zip_free[] + + The contents of the list node + is undefined if !in_flush_list + && state == BUF_BLOCK_FILE_PAGE, + or if state is one of + BUF_BLOCK_MEMORY, + BUF_BLOCK_REMOVE_HASH or + BUF_BLOCK_READY_IN_USE. */ + /* resplit for optimistic use */ UT_LIST_NODE_T(buf_page_t) free; UT_LIST_NODE_T(buf_page_t) flush_list; @@ -1155,18 +1142,8 @@ struct buf_page_struct{ debugging */ //#endif /* UNIV_DEBUG */ unsigned old:1; /*!< TRUE if the block is in the old - blocks in the LRU list */ - unsigned LRU_position:31;/*!< value which monotonically - decreases (or may stay - constant if old==TRUE) toward - the end of the LRU list, if - buf_pool->ulint_clock has not - wrapped around: NOTE that this - value can only be used in - heuristic algorithms, because - of the possibility of a - wrap-around! */ - unsigned freed_page_clock:32;/*!< the value of + blocks in buf_pool->LRU_old */ + unsigned freed_page_clock:31;/*!< the value of buf_pool->freed_page_clock when this block was the last time put to the head of the @@ -1174,6 +1151,9 @@ struct buf_page_struct{ to read this for heuristic purposes without holding any mutex or latch */ + unsigned access_time:32; /*!< time of first access, or + 0 if the block was never accessed + in the buffer pool */ /* @} */ # ifdef UNIV_DEBUG_FILE_ACCESSES ibool file_page_was_freed; @@ -1318,6 +1298,31 @@ Compute the hash fold value for blocks in buf_pool->zip_hash. */ #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) /* @} */ +/** @brief The buffer pool statistics structure. */ +struct buf_pool_stat_struct{ + ulint n_page_gets; /*!< number of page gets performed; + also successful searches through + the adaptive hash index are + counted as page gets; this field + is NOT protected by the buffer + pool mutex */ + ulint n_pages_read; /*!< number read operations */ + ulint n_pages_written;/*!< number write operations */ + ulint n_pages_created;/*!< number of pages created + in the pool with no read */ + ulint n_ra_pages_read;/*!< number of pages read in + as part of read ahead */ + ulint n_ra_pages_evicted;/*!< number of read ahead + pages that are evicted without + being accessed */ + ulint n_pages_made_young; /*!< number of pages made young, in + calls to buf_LRU_make_block_young() */ + ulint n_pages_not_made_young; /*!< number of pages not made + young because the first access + was not long enough ago, in + buf_page_peek_if_too_old() */ +}; + /** @brief The buffer pool structure. NOTE! The definition appears here only for other modules of this @@ -1342,28 +1347,16 @@ struct buf_pool_struct{ ulint n_pend_reads; /*!< number of pending read operations */ ulint n_pend_unzip; /*!< number of pending decompressions */ - time_t last_printout_time; /*!< when buf_print was last time + time_t last_printout_time; + /*!< when buf_print_io was last time called */ - ulint n_pages_read; /*!< number read operations */ - ulint n_pages_written;/*!< number write operations */ - ulint n_pages_created;/*!< number of pages created - in the pool with no read */ - ulint n_page_gets; /*!< number of page gets performed; - also successful searches through - the adaptive hash index are - counted as page gets; this field - is NOT protected by the buffer - pool mutex */ - ulint n_page_gets_old;/*!< n_page_gets when buf_print was - last time called: used to calculate - hit rate */ - ulint n_pages_read_old;/*!< n_pages_read when buf_print was - last time called */ - ulint n_pages_written_old;/*!< number write operations */ - ulint n_pages_created_old;/*!< number of pages created in - the pool with no read */ + buf_pool_stat_t stat; /*!< current statistics */ + buf_pool_stat_t old_stat; /*!< old statistics */ + /* @} */ + /** @name Page flushing algorithm fields */ + /* @{ */ UT_LIST_BASE_NODE_T(buf_page_t) flush_list; @@ -1379,10 +1372,6 @@ struct buf_pool_struct{ /*!< this is in the set state when there is no flush batch of the given type running */ - ulint ulint_clock; /*!< a sequence number used to count - time. NOTE! This counter wraps - around at 4 billion (if ulint == - 32 bits)! */ ulint freed_page_clock;/*!< a sequence number used to count the number of buffer blocks removed from the end of @@ -1406,17 +1395,18 @@ struct buf_pool_struct{ block list */ UT_LIST_BASE_NODE_T(buf_page_t) LRU; /*!< base node of the LRU list */ - buf_page_t* LRU_old; /*!< pointer to the about 3/8 oldest - blocks in the LRU list; NULL if LRU - length less than BUF_LRU_OLD_MIN_LEN; + buf_page_t* LRU_old; /*!< pointer to the about + buf_LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV + oldest blocks in the LRU list; + NULL if LRU length less than + BUF_LRU_OLD_MIN_LEN; NOTE: when LRU_old != NULL, its length should always equal LRU_old_len */ ulint LRU_old_len; /*!< length of the LRU list from the block to which LRU_old points onward, including that block; see buf0lru.c for the restrictions - on this value; not defined if - LRU_old == NULL; + on this value; 0 if LRU_old == NULL; NOTE: LRU_old_len must be adjusted whenever LRU_old shrinks or grows! */ diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index 1a2b2556839..e48794fc753 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -72,9 +72,30 @@ buf_page_peek_if_too_old( /*=====================*/ const buf_page_t* bpage) /*!< in: block to make younger */ { - return(buf_pool->freed_page_clock - >= buf_page_get_freed_page_clock(bpage) - + 1 + (buf_pool->curr_size / 4)); + if (UNIV_UNLIKELY(buf_pool->freed_page_clock == 0)) { + /* If eviction has not started yet, do not update the + statistics or move blocks in the LRU list. This is + either the warm-up phase or an in-memory workload. */ + return(FALSE); + } else if (buf_LRU_old_threshold_ms && bpage->old) { + unsigned access_time = buf_page_is_accessed(bpage); + + if (access_time > 0 + && (ut_time_ms() - access_time) + >= buf_LRU_old_threshold_ms) { + return(TRUE); + } + + buf_pool->stat.n_pages_not_made_young++; + return(FALSE); + } else { + /* FIXME: bpage->freed_page_clock is 31 bits */ + return((buf_pool->freed_page_clock & ((1UL << 31) - 1)) + > ((ulint) bpage->freed_page_clock + + (buf_pool->curr_size + * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio) + / (BUF_LRU_OLD_RATIO_DIV * 4)))); + } } /*********************************************************************//** @@ -125,23 +146,6 @@ try_again: return(lsn); } - -/*******************************************************************//** -Increments the buf_pool clock by one and returns its new value. Remember -that in the 32 bit version the clock wraps around at 4 billion! -@return new clock value */ -UNIV_INLINE -ulint -buf_pool_clock_tic(void) -/*====================*/ -{ - //ut_ad(buf_pool_mutex_own()); - ut_ad(mutex_own(&LRU_list_mutex)); - - buf_pool->ulint_clock++; - - return(buf_pool->ulint_clock); -} #endif /* !UNIV_HOTBACKUP */ /*********************************************************************//** @@ -288,21 +292,6 @@ buf_page_belongs_to_unzip_LRU( } /*********************************************************************//** -Determine the approximate LRU list position of a block. -@return LRU list position */ -UNIV_INLINE -ulint -buf_page_get_LRU_position( -/*======================*/ - const buf_page_t* bpage) /*!< in: control block */ -{ - ut_ad(buf_page_in_file(bpage)); - //ut_ad(buf_pool_mutex_own()); /* This is used in optimistic */ - - return(bpage->LRU_position); -} - -/*********************************************************************//** Gets the mutex of a block. @return pointer to mutex protecting bpage */ UNIV_INLINE @@ -508,10 +497,19 @@ buf_page_set_old( ut_ad(bpage->in_LRU_list); #ifdef UNIV_LRU_DEBUG - if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage) - && UT_LIST_GET_PREV(LRU, bpage)->old - == UT_LIST_GET_NEXT(LRU, bpage)->old) { - ut_a(UT_LIST_GET_PREV(LRU, bpage)->old == old); + ut_a((buf_pool->LRU_old_len == 0) == (buf_pool->LRU_old == NULL)); + /* If a block is flagged "old", the LRU_old list must exist. */ + ut_a(!old || buf_pool->LRU_old); + + if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage)) { + const buf_page_t* prev = UT_LIST_GET_PREV(LRU, bpage); + const buf_page_t* next = UT_LIST_GET_NEXT(LRU, bpage); + if (prev->old == next->old) { + ut_a(prev->old == old); + } else { + ut_a(!prev->old); + ut_a(buf_pool->LRU_old == (old ? bpage : next)); + } } #endif /* UNIV_LRU_DEBUG */ @@ -519,17 +517,17 @@ buf_page_set_old( } /*********************************************************************//** -Determine if a block has been accessed in the buffer pool. -@return TRUE if accessed */ +Determine the time of first access of a block in the buffer pool. +@return ut_time_ms() at the time of first access, 0 if not accessed */ UNIV_INLINE -ibool +unsigned buf_page_is_accessed( /*=================*/ const buf_page_t* bpage) /*!< in: control block */ { ut_ad(buf_page_in_file(bpage)); - return(bpage->accessed); + return(bpage->access_time); } /*********************************************************************//** @@ -539,12 +537,16 @@ void buf_page_set_accessed( /*==================*/ buf_page_t* bpage, /*!< in/out: control block */ - ibool accessed) /*!< in: accessed */ + ulint time_ms) /*!< in: ut_time_ms() */ { ut_a(buf_page_in_file(bpage)); + //ut_ad(buf_pool_mutex_own()); ut_ad(mutex_own(buf_page_get_mutex(bpage))); - bpage->accessed = accessed; + if (!bpage->access_time) { + /* Make this the time of the first access. */ + bpage->access_time = time_ms; + } } /*********************************************************************//** @@ -825,15 +827,15 @@ buf_page_get_newest_modification( ib_uint64_t lsn; mutex_t* block_mutex = buf_page_get_mutex_enter(bpage); - ut_a(block_mutex); - - if (buf_page_in_file(bpage)) { + if (block_mutex && buf_page_in_file(bpage)) { lsn = bpage->newest_modification; } else { lsn = 0; } - mutex_exit(block_mutex); + if (block_mutex) { + mutex_exit(block_mutex); + } return(lsn); } diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h index cde88824b64..0a7d01c95cf 100644 --- a/storage/xtradb/include/buf0lru.h +++ b/storage/xtradb/include/buf0lru.h @@ -69,7 +69,7 @@ These are low-level functions #########################################################################*/ /** Minimum LRU list length for which the LRU_old pointer is defined */ -#define BUF_LRU_OLD_MIN_LEN 80 +#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */ /** Maximum LRU list search length in buf_flush_LRU_recommendation() */ #define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA) @@ -84,15 +84,6 @@ void buf_LRU_invalidate_tablespace( /*==========================*/ ulint id); /*!< in: space id */ -/******************************************************************//** -Gets the minimum LRU_position field for the blocks in an initial segment -(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not -guaranteed to be precise, because the ulint_clock may wrap around. -@return the limit; zero if could not determine it */ -UNIV_INTERN -ulint -buf_LRU_get_recent_limit(void); -/*==========================*/ /********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ UNIV_INTERN @@ -203,6 +194,18 @@ void buf_LRU_make_block_old( /*===================*/ buf_page_t* bpage); /*!< in: control block */ +/**********************************************************************//** +Updates buf_LRU_old_ratio. +@return updated old_pct */ +UNIV_INTERN +uint +buf_LRU_old_ratio_update( +/*=====================*/ + uint old_pct,/*!< in: Reserve this percentage of + the buffer pool for "old" blocks. */ + ibool adjust);/*!< in: TRUE=adjust the LRU list; + FALSE=just assign buf_LRU_old_ratio + during the initialization of InnoDB */ /********************************************************************//** Update the historical stats that we are collecting for LRU eviction policy at the end of each interval. */ @@ -210,6 +213,18 @@ UNIV_INTERN void buf_LRU_stat_update(void); /*=====================*/ +/********************************************************************//** +Dump the LRU page list to the specific file. */ +UNIV_INTERN +ibool +buf_LRU_file_dump(void); +/*===================*/ +/********************************************************************//** +Read the pages based on the specific file.*/ +UNIV_INTERN +ibool +buf_LRU_file_restore(void); +/*======================*/ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /**********************************************************************//** @@ -229,6 +244,35 @@ buf_LRU_print(void); /*===============*/ #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ +/** @name Heuristics for detecting index scan @{ */ +/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for +"old" blocks. Protected by buf_pool_mutex. */ +extern uint buf_LRU_old_ratio; +/** The denominator of buf_LRU_old_ratio. */ +#define BUF_LRU_OLD_RATIO_DIV 1024 +/** Maximum value of buf_LRU_old_ratio. +@see buf_LRU_old_adjust_len +@see buf_LRU_old_ratio_update */ +#define BUF_LRU_OLD_RATIO_MAX BUF_LRU_OLD_RATIO_DIV +/** Minimum value of buf_LRU_old_ratio. +@see buf_LRU_old_adjust_len +@see buf_LRU_old_ratio_update +The minimum must exceed +(BUF_LRU_OLD_TOLERANCE + 5) * BUF_LRU_OLD_RATIO_DIV / BUF_LRU_OLD_MIN_LEN. */ +#define BUF_LRU_OLD_RATIO_MIN 51 + +#if BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX +# error "BUF_LRU_OLD_RATIO_MIN >= BUF_LRU_OLD_RATIO_MAX" +#endif +#if BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV +# error "BUF_LRU_OLD_RATIO_MAX > BUF_LRU_OLD_RATIO_DIV" +#endif + +/** Move blocks to "new" LRU list only if the first access was at +least this many milliseconds ago. Not protected by any mutex or latch. */ +extern uint buf_LRU_old_threshold_ms; +/* @} */ + /** @brief Statistics for selecting the LRU list for eviction. These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h index b4d25e6fde0..71f62ff7b49 100644 --- a/storage/xtradb/include/buf0rea.h +++ b/storage/xtradb/include/buf0rea.h @@ -27,28 +27,59 @@ Created 11/5/1995 Heikki Tuuri #define buf0rea_h #include "univ.i" +#include "trx0types.h" #include "buf0types.h" /********************************************************************//** +Low-level function which reads a page asynchronously from a file to the +buffer buf_pool if it is not already there, in which case does nothing. +Sets the io_fix flag and sets an exclusive lock on the buffer frame. The +flag is cleared and the x-lock released by an i/o-handler thread. +@return 1 if a read request was queued, 0 if the page already resided +in buf_pool, or if the page is in the doublewrite buffer blocks in +which case it is never read into the pool, or if the tablespace does +not exist or is being dropped +@return 1 if read request is issued. 0 if it is not */ +UNIV_INTERN +ulint +buf_read_page_low( +/*==============*/ + ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are + trying to read from a non-existent tablespace, or a + tablespace which is just now being dropped */ + ibool sync, /*!< in: TRUE if synchronous aio is desired */ + ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ..., + ORed to OS_AIO_SIMULATED_WAKE_LATER (see below + at read-ahead functions) */ + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size, or 0 */ + ibool unzip, /*!< in: TRUE=request uncompressed page */ + ib_int64_t tablespace_version, /*!< in: if the space memory object has + this timestamp different from what we are giving here, + treat the tablespace as dropped; this is a timestamp we + use to stop dangling page reads from a tablespace + which we have DISCARDed + IMPORTed back */ + ulint offset, /*!< in: page number */ + trx_t* trx); +/********************************************************************//** High-level function which reads a page asynchronously from a file to the buffer buf_pool if it is not already there. Sets the io_fix flag and sets an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. Does a random read-ahead if it seems -sensible. -@return number of page read requests issued: this can be greater than -1 if read-ahead occurred */ +released by the i/o-handler thread. +@return TRUE if page has been read in, FALSE in case of failure */ UNIV_INTERN -ulint +ibool buf_read_page( /*==========*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset);/*!< in: page number */ + ulint offset, /*!< in: page number */ + trx_t* trx); /********************************************************************//** Applies linear read-ahead if in the buf_pool the page is a border page of a linear read-ahead area and all the pages in the area have been accessed. Does not read any page if the read-ahead mechanism is not activated. Note -that the the algorithm looks at the 'natural' adjacent successor and +that the algorithm looks at the 'natural' adjacent successor and predecessor of the page, which on the leaf level of a B-tree are the next and previous page in the chain of leaves. To know these, the page specified in (space, offset) must already be present in the buf_pool. Thus, the @@ -74,8 +105,9 @@ buf_read_ahead_linear( /*==================*/ ulint space, /*!< in: space id */ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ - ulint offset);/*!< in: page number of a page; NOTE: the current thread + ulint offset, /*!< in: page number of a page; NOTE: the current thread must want access to this page (see NOTE 3 above) */ + trx_t* trx); /********************************************************************//** Issues read requests for pages which the ibuf module wants to read in, in order to contract the insert buffer tree. Technically, this function is like diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h index e7167d716a0..bfae6477135 100644 --- a/storage/xtradb/include/buf0types.h +++ b/storage/xtradb/include/buf0types.h @@ -34,6 +34,8 @@ typedef struct buf_block_struct buf_block_t; typedef struct buf_chunk_struct buf_chunk_t; /** Buffer pool comprising buf_chunk_t */ typedef struct buf_pool_struct buf_pool_t; +/** Buffer pool statistics struct */ +typedef struct buf_pool_stat_struct buf_pool_stat_t; /** A buffer frame. @see page_t */ typedef byte buf_frame_t; diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h index 23898583b72..747e9b5364e 100644 --- a/storage/xtradb/include/db0err.h +++ b/storage/xtradb/include/db0err.h @@ -32,6 +32,7 @@ enum db_err { /* The following are error codes */ DB_ERROR, + DB_INTERRUPTED, DB_OUT_OF_MEMORY, DB_OUT_OF_FILE_SPACE, DB_LOCK_WAIT, diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h index 3107d771d88..cce1246b789 100644 --- a/storage/xtradb/include/dict0crea.h +++ b/storage/xtradb/include/dict0crea.h @@ -110,7 +110,7 @@ dict_create_or_check_foreign_constraint_tables(void); Adds foreign key definitions to data dictionary tables in the database. We look at table->foreign_list, and also generate names to constraints that were not named by the user. A generated constraint has a name of the format -databasename/tablename_ibfk_<number>, where the numbers start from 1, and are +databasename/tablename_ibfk_NUMBER, where the numbers start from 1, and are given locally for this table, that is, the number is not global, as in the old format constraints < 4.0.18 it used to be. @return error code or DB_SUCCESS */ diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 0842e307cda..3d49b6a98a4 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -712,7 +712,7 @@ dict_index_find_on_id_low( dulint id); /*!< in: index id */ /**********************************************************************//** Adds an index to the dictionary cache. -@return DB_SUCCESS or error code */ +@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */ UNIV_INTERN ulint dict_index_add_to_cache( @@ -1157,6 +1157,13 @@ void dict_ind_init(void); /*===============*/ +/**********************************************************************//** +Closes the data dictionary module. */ +UNIV_INTERN +void +dict_close(void); +/*============*/ + #ifndef UNIV_NONINL #include "dict0dict.ic" #endif diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index 1ee906fbf57..2d001111938 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -317,7 +317,7 @@ struct dict_foreign_struct{ char* id; /*!< id of the constraint as a null-terminated string */ unsigned n_fields:10; /*!< number of indexes' first fields - for which the the foreign key + for which the foreign key constraint is defined: we allow the indexes to contain more fields than mentioned in the constraint, as long diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 0470d533dec..805948f5b5d 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -224,15 +224,6 @@ fil_space_create( 0 for uncompressed tablespaces */ ulint purpose);/*!< in: FIL_TABLESPACE, or FIL_LOG if log */ /*******************************************************************//** -Frees a space object from a the tablespace memory cache. Closes the files in -the chain but does not delete them. -@return TRUE if success */ -UNIV_INTERN -ibool -fil_space_free( -/*===========*/ - ulint id); /*!< in: space id */ -/*******************************************************************//** Returns the size of the space in pages. The tablespace must be cached in the memory cache. @return space size, 0 if space not found */ @@ -278,6 +269,12 @@ fil_init( ulint hash_size, /*!< in: hash table size */ ulint max_n_open); /*!< in: max number of open files */ /*******************************************************************//** +Initializes the tablespace memory cache. */ +UNIV_INTERN +void +fil_close(void); +/*===========*/ +/*******************************************************************//** Opens all log files and system tablespace data files. They stay open until the database server shutdown. This should be called at a server startup after the space objects for the log and the system tablespace have been created. The @@ -614,9 +611,12 @@ fil_space_get_n_reserved_extents( Reads or writes data. This operation is asynchronous (aio). @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace which does not exist */ +#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \ + _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL) + UNIV_INTERN ulint -fil_io( +_fil_io( /*===*/ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE, ORed to OS_FILE_LOG, if a log i/o @@ -641,8 +641,25 @@ fil_io( void* buf, /*!< in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message); /*!< in: message for aio handler if non-sync + void* message, /*!< in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx); +/********************************************************************//** +Confirm whether the parameters are valid or not */ +UNIV_INTERN +ibool +fil_area_is_exist( +/*==============*/ + ulint space_id, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint block_offset, /*!< in: offset in number of blocks */ + ulint byte_offset, /*!< in: remainder of offset in bytes; in + aio this must be divisible by the OS block + size */ + ulint len); /*!< in: how many bytes to read or write; this + must not cross a file boundary; in aio this + must be a block size multiple */ /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h index 5f7dc58eedc..7abd3914eda 100644 --- a/storage/xtradb/include/fsp0fsp.h +++ b/storage/xtradb/include/fsp0fsp.h @@ -42,7 +42,7 @@ fsp_init(void); /*==========*/ /**********************************************************************//** Gets the current free limit of the system tablespace. The free limit -means the place of the first page which has never been put to the the +means the place of the first page which has never been put to the free list for allocation. The space above that address is initialized to zero. Sets also the global variable log_fsp_current_free_limit. @return free limit in megabytes */ diff --git a/storage/xtradb/include/ibuf0ibuf.h b/storage/xtradb/include/ibuf0ibuf.h index 21330997df3..8aa21fb9d95 100644 --- a/storage/xtradb/include/ibuf0ibuf.h +++ b/storage/xtradb/include/ibuf0ibuf.h @@ -356,6 +356,12 @@ void ibuf_print( /*=======*/ FILE* file); /*!< in: file where to print */ +/******************************************************************//** +Closes insert buffer and frees the data structures. */ +UNIV_INTERN +void +ibuf_close(void); +/*============*/ #define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO #define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h index fa5db831d4f..82e4c9bd976 100644 --- a/storage/xtradb/include/lock0lock.h +++ b/storage/xtradb/include/lock0lock.h @@ -59,6 +59,12 @@ lock_sys_create( /*============*/ ulint n_cells); /*!< in: number of slots in lock hash table */ /*********************************************************************//** +Closes the lock system at database shutdown. */ +UNIV_INTERN +void +lock_sys_close(void); +/*================*/ +/*********************************************************************//** Checks if some transaction has an implicit x-lock on a record in a clustered index. @return transaction which has the x-lock, or NULL */ @@ -630,6 +636,14 @@ lock_number_of_rows_locked( /*=======================*/ trx_t* trx); /*!< in: transaction */ /*******************************************************************//** +Check if a transaction holds any autoinc locks. +@return TRUE if the transaction holds any AUTOINC locks. */ +UNIV_INTERN +ibool +lock_trx_holds_autoinc_locks( +/*=========================*/ + const trx_t* trx); /*!< in: transaction */ +/*******************************************************************//** Release all the transaction's autoinc locks. */ UNIV_INTERN void diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 059f548a085..135aeb69e2d 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -118,10 +118,9 @@ UNIV_INLINE ib_uint64_t log_reserve_and_write_fast( /*=======================*/ - byte* str, /*!< in: string */ + const void* str, /*!< in: string */ ulint len, /*!< in: string length */ - ib_uint64_t* start_lsn,/*!< out: start lsn of the log record */ - ibool* success);/*!< out: TRUE if success */ + ib_uint64_t* start_lsn);/*!< out: start lsn of the log record */ /***********************************************************************//** Releases the log mutex. */ UNIV_INLINE @@ -283,7 +282,7 @@ log_make_checkpoint_at( later lsn, if IB_ULONGLONG_MAX, makes a checkpoint at the latest lsn */ ibool write_always); /*!< in: the function normally checks if - the the new checkpoint would have a + the new checkpoint would have a greater lsn than the previous one: if not, then no physical write is done; by setting this parameter TRUE, a @@ -573,6 +572,18 @@ UNIV_INTERN void log_refresh_stats(void); /*===================*/ +/********************************************************** +Shutdown the log system but do not release all the memory. */ +UNIV_INTERN +void +log_shutdown(void); +/*==============*/ +/********************************************************** +Free the log system data structures. */ +UNIV_INTERN +void +log_mem_free(void); +/*==============*/ extern log_t* log_sys; @@ -585,7 +596,7 @@ extern log_t* log_sys; #define LOG_RECOVER 98887331 /* The counting of lsn's starts from this value: this must be non-zero */ -#define LOG_START_LSN ((ib_uint64_t) (16 * OS_FILE_LOG_BLOCK_SIZE)) +#define LOG_START_LSN ((ib_uint64_t) (16 * OS_FILE_LOG_BLOCK_SIZE)) #define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE) #define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4) @@ -722,9 +733,12 @@ struct log_group_struct{ ulint lsn_offset; /*!< the offset of the above lsn */ ulint n_pending_writes;/*!< number of currently pending flush writes for this log group */ + byte** file_header_bufs_ptr;/*!< unaligned buffers */ byte** file_header_bufs;/*!< buffers for each file header in the group */ +#ifdef UNIV_LOG_ARCHIVE /*-----------------------------*/ + byte** archive_file_header_bufs_ptr;/*!< unaligned buffers */ byte** archive_file_header_bufs;/*!< buffers for each file header in the group */ ulint archive_space_id;/*!< file space which @@ -743,10 +757,12 @@ struct log_group_struct{ completion function then sets the new value to ..._file_no */ ulint next_archived_offset; /*!< like the preceding field */ +#endif /* UNIV_LOG_ARCHIVE */ /*-----------------------------*/ ib_uint64_t scanned_lsn; /*!< used only in recovery: recovery scan succeeded up to this lsn in this log group */ + byte* checkpoint_buf_ptr;/*!< unaligned checkpoint header */ byte* checkpoint_buf; /*!< checkpoint header is written from this buffer to the group */ UT_LIST_NODE_T(log_group_t) @@ -764,6 +780,7 @@ struct log_struct{ #ifndef UNIV_HOTBACKUP mutex_t mutex; /*!< mutex protecting the log */ #endif /* !UNIV_HOTBACKUP */ + byte* buf_ptr; /* unaligned log buffer */ byte* buf; /*!< log buffer */ ulint buf_size; /*!< log buffer size in bytes */ ulint max_buf_free; /*!< recommended maximum value of @@ -900,6 +917,7 @@ struct log_struct{ should wait for this without owning the log mutex */ #endif /* !UNIV_HOTBACKUP */ + byte* checkpoint_buf_ptr;/* unaligned checkpoint header */ byte* checkpoint_buf; /*!< checkpoint header is read to this buffer */ /* @} */ diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic index d071985982a..36d151a3064 100644 --- a/storage/xtradb/include/log0log.ic +++ b/storage/xtradb/include/log0log.ic @@ -27,6 +27,7 @@ Created 12/9/1995 Heikki Tuuri #include "mach0data.h" #include "mtr0mtr.h" +#ifdef UNIV_LOG_DEBUG /******************************************************//** Checks by parsing that the catenated log segment for a single mtr is consistent. */ @@ -34,11 +35,12 @@ UNIV_INTERN ibool log_check_log_recs( /*===============*/ - byte* buf, /*!< in: pointer to the start of + const byte* buf, /*!< in: pointer to the start of the log segment in the log_sys->buf log buffer */ ulint len, /*!< in: segment length in bytes */ ib_uint64_t buf_start_lsn); /*!< in: buffer start lsn */ +#endif /* UNIV_LOG_DEBUG */ /************************************************************//** Gets a log block flush bit. @@ -305,55 +307,76 @@ UNIV_INLINE ib_uint64_t log_reserve_and_write_fast( /*=======================*/ - byte* str, /*!< in: string */ + const void* str, /*!< in: string */ ulint len, /*!< in: string length */ - ib_uint64_t* start_lsn,/*!< out: start lsn of the log record */ - ibool* success)/*!< out: TRUE if success */ + ib_uint64_t* start_lsn)/*!< out: start lsn of the log record */ { - log_t* log = log_sys; ulint data_len; - ib_uint64_t lsn; +#ifdef UNIV_LOG_LSN_DEBUG + /* length of the LSN pseudo-record */ + ulint lsn_len = 1 + + mach_get_compressed_size(log_sys->lsn >> 32) + + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL); +#endif /* UNIV_LOG_LSN_DEBUG */ - *success = TRUE; + mutex_enter(&log_sys->mutex); - mutex_enter(&(log->mutex)); - - data_len = len + log->buf_free % OS_FILE_LOG_BLOCK_SIZE; + data_len = len +#ifdef UNIV_LOG_LSN_DEBUG + + lsn_len +#endif /* UNIV_LOG_LSN_DEBUG */ + + log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE; if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { /* The string does not fit within the current log block or the log block would become full */ - *success = FALSE; - - mutex_exit(&(log->mutex)); + mutex_exit(&log_sys->mutex); return(0); } - *start_lsn = log->lsn; - - ut_memcpy(log->buf + log->buf_free, str, len); + *start_lsn = log_sys->lsn; + +#ifdef UNIV_LOG_LSN_DEBUG + { + /* Write the LSN pseudo-record. */ + byte* b = &log_sys->buf[log_sys->buf_free]; + *b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str); + /* Write the LSN in two parts, + as a pseudo page number and space id. */ + b += mach_write_compressed(b, log_sys->lsn >> 32); + b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL); + ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]); + + memcpy(b, str, len); + len += lsn_len; + } +#else /* UNIV_LOG_LSN_DEBUG */ + memcpy(log_sys->buf + log_sys->buf_free, str, len); +#endif /* UNIV_LOG_LSN_DEBUG */ - log_block_set_data_len((byte*) ut_align_down(log->buf + log->buf_free, + log_block_set_data_len((byte*) ut_align_down(log_sys->buf + + log_sys->buf_free, OS_FILE_LOG_BLOCK_SIZE), data_len); #ifdef UNIV_LOG_DEBUG - log->old_buf_free = log->buf_free; - log->old_lsn = log->lsn; + log_sys->old_buf_free = log_sys->buf_free; + log_sys->old_lsn = log_sys->lsn; #endif - log->buf_free += len; + log_sys->buf_free += len; - ut_ad(log->buf_free <= log->buf_size); + ut_ad(log_sys->buf_free <= log_sys->buf_size); - lsn = log->lsn += len; + log_sys->lsn += len; #ifdef UNIV_LOG_DEBUG - log_check_log_recs(log->buf + log->old_buf_free, - log->buf_free - log->old_buf_free, log->old_lsn); + log_check_log_recs(log_sys->buf + log_sys->old_buf_free, + log_sys->buf_free - log_sys->old_buf_free, + log_sys->old_lsn); #endif - return(lsn); + return(log_sys->lsn); } /***********************************************************************//** diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h index 8468c213bdb..ac6b19a3f6a 100644 --- a/storage/xtradb/include/log0recv.h +++ b/storage/xtradb/include/log0recv.h @@ -239,6 +239,18 @@ UNIV_INTERN void recv_sys_create(void); /*=================*/ +/**********************************************************//** +Release recovery system mutexes. */ +UNIV_INTERN +void +recv_sys_close(void); +/*================*/ +/********************************************************//** +Frees the recovery system memory. */ +UNIV_INTERN +void +recv_sys_mem_free(void); +/*===================*/ /********************************************************//** Inits the recovery system for a recovery operation. */ UNIV_INTERN @@ -246,6 +258,12 @@ void recv_sys_init( /*==========*/ ulint available_memory); /*!< in: available memory in bytes */ +/********************************************************//** +Reset the state of the recovery system variables. */ +UNIV_INTERN +void +recv_sys_var_init(void); +/*===================*/ /*******************************************************************//** Empties the hash table of stored log records, applying them to appropriate pages. */ @@ -412,6 +430,39 @@ struct recv_sys_struct{ hash_table_t* addr_hash;/*!< hash table of file addresses of pages */ ulint n_addrs;/*!< number of not processed hashed file addresses in the hash table */ + +/* If you modified the following defines at original file, + You should also modify them. */ +/* defined in os0file.c */ +#define OS_AIO_MERGE_N_CONSECUTIVE 64 +/* defined in log0recv.c */ +#define RECV_READ_AHEAD_AREA 32 + time_t stats_recv_start_time; + ulint stats_recv_turns; + + ulint stats_read_requested_pages; + ulint stats_read_in_area[RECV_READ_AHEAD_AREA]; + + ulint stats_read_io_pages; + ulint stats_read_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE]; + ulint stats_write_io_pages; + ulint stats_write_io_consecutive[OS_AIO_MERGE_N_CONSECUTIVE]; + + ulint stats_doublewrite_check_pages; + ulint stats_doublewrite_overwrite_pages; + + ulint stats_recover_pages_with_read; + ulint stats_recover_pages_without_read; + + ulint stats_log_recs; + ulint stats_log_len_sum; + + ulint stats_applied_log_recs; + ulint stats_applied_log_len_sum; + ulint stats_pages_already_new; + + ib_uint64_t stats_oldest_modified_lsn; + ib_uint64_t stats_newest_modified_lsn; }; /** The recovery system */ @@ -433,6 +484,11 @@ are allowed yet: the variable name is misleading. */ extern ibool recv_no_ibuf_operations; /** TRUE when recv_init_crash_recovery() has been called. */ extern ibool recv_needed_recovery; +#ifdef UNIV_DEBUG +/** TRUE if writing to the redo log (mtr_commit) is forbidden. +Protected by log_sys->mutex. */ +extern ibool recv_no_log_write; +#endif /* UNIV_DEBUG */ /** TRUE if buf_page_is_corrupted() should check if the log sequence number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by diff --git a/storage/xtradb/include/mem0mem.h b/storage/xtradb/include/mem0mem.h index a092b024219..98f8748e529 100644 --- a/storage/xtradb/include/mem0mem.h +++ b/storage/xtradb/include/mem0mem.h @@ -82,6 +82,13 @@ void mem_init( /*=====*/ ulint size); /*!< in: common pool size in bytes */ +/******************************************************************//** +Closes the memory system. */ +UNIV_INTERN +void +mem_close(void); +/*===========*/ + /**************************************************************//** Use this macro instead of the corresponding function! Macro for memory heap creation. */ diff --git a/storage/xtradb/include/mem0pool.h b/storage/xtradb/include/mem0pool.h index 18f988241d6..5e93bf88a47 100644 --- a/storage/xtradb/include/mem0pool.h +++ b/storage/xtradb/include/mem0pool.h @@ -62,6 +62,13 @@ mem_pool_create( /*============*/ ulint size); /*!< in: pool size in bytes */ /********************************************************************//** +Frees a memory pool. */ +UNIV_INTERN +void +mem_pool_free( +/*==========*/ + mem_pool_t* pool); /*!< in, own: memory pool */ +/********************************************************************//** Allocates memory from a pool. NOTE: This low-level function should only be used in mem0mem.*! @return own: allocated memory buffer */ diff --git a/storage/xtradb/include/mtr0mtr.h b/storage/xtradb/include/mtr0mtr.h index 69a2c03f4cb..bc3f1951be9 100644 --- a/storage/xtradb/include/mtr0mtr.h +++ b/storage/xtradb/include/mtr0mtr.h @@ -106,6 +106,9 @@ For 1 - 8 bytes, the flag value must give the length also! @{ */ #define MLOG_IBUF_BITMAP_INIT ((byte)27) /*!< initialize an ibuf bitmap page */ /*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */ +#ifdef UNIV_LOG_LSN_DEBUG +# define MLOG_LSN ((byte)28) /* current LSN */ +#endif #define MLOG_INIT_FILE_PAGE ((byte)29) /*!< this means that a file page is taken into use and the prior @@ -118,7 +121,7 @@ For 1 - 8 bytes, the flag value must give the length also! @{ */ #define MLOG_WRITE_STRING ((byte)30) /*!< write a string to a page */ #define MLOG_MULTI_REC_END ((byte)31) /*!< if a single mtr writes - log records for several pages, + several log records, this log record ends the sequence of these records */ #define MLOG_DUMMY_RECORD ((byte)32) /*!< dummy log record used to diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index d8d2f0e5d9e..3eff5216867 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -53,6 +53,7 @@ Created 10/21/1995 Heikki Tuuri #define os0file_h #include "univ.i" +#include "trx0types.h" #ifndef __WIN__ #include <dirent.h> @@ -157,6 +158,8 @@ log. */ to become available again */ #define OS_FILE_SHARING_VIOLATION 76 #define OS_FILE_ERROR_NOT_SPECIFIED 77 +#define OS_FILE_INSUFFICIENT_RESOURCE 78 +#define OS_FILE_OPERATION_ABORTED 79 /* @} */ /** Types for aio operations @{ */ @@ -497,9 +500,12 @@ os_file_get_last_error( /*******************************************************************//** Requests a synchronous read operation. @return TRUE if request was successful, FALSE if fail */ +#define os_file_read(file, buf, offset, offset_high, n) \ + _os_file_read(file, buf, offset, offset_high, n, NULL) + UNIV_INTERN ibool -os_file_read( +_os_file_read( /*=========*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ @@ -507,7 +513,8 @@ os_file_read( offset where to read */ ulint offset_high,/*!< in: most significant 32 bits of offset */ - ulint n); /*!< in: number of bytes to read */ + ulint n, /*!< in: number of bytes to read */ + trx_t* trx); /*******************************************************************//** Rewind file to its start, read at most size - 1 bytes from it to str, and NUL-terminate str. All errors are silently ignored. This function is @@ -619,6 +626,13 @@ os_aio_init( ulint n_write_segs, /*<! in: number of writer threads */ ulint n_slots_sync); /*<! in: number of slots in the sync aio array */ +/*********************************************************************** +Frees the asynchronous io system. */ +UNIV_INTERN +void +os_aio_free(void); +/*=============*/ + /*******************************************************************//** Requests an asynchronous i/o operation. @return TRUE if request was queued successfully, FALSE if fail */ @@ -654,10 +668,11 @@ os_aio( (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ - void* message2);/*!< in: message for the aio handler + void* message2,/*!< in: message for the aio handler (can be used to identify a completed aio operation); ignored if mode is OS_AIO_SYNC */ + trx_t* trx); /************************************************************************//** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 0e0b32e7036..0c22162b900 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -285,44 +285,74 @@ os_fast_mutex_free( /**********************************************************//** Atomic compare-and-swap and increment for InnoDB. */ -#ifdef HAVE_GCC_ATOMIC_BUILTINS +#if defined(HAVE_IB_GCC_ATOMIC_BUILTINS) + +#define HAVE_ATOMIC_BUILTINS + /**********************************************************//** Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ + # define os_compare_and_swap(ptr, old_val, new_val) \ __sync_bool_compare_and_swap(ptr, old_val, new_val) + # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ os_compare_and_swap(ptr, old_val, new_val) + # define os_compare_and_swap_lint(ptr, old_val, new_val) \ os_compare_and_swap(ptr, old_val, new_val) -# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ + +# ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC +# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ os_compare_and_swap(ptr, old_val, new_val) +# define INNODB_RW_LOCKS_USE_ATOMICS +# define IB_ATOMICS_STARTUP_MSG \ + "Mutexes and rw_locks use GCC atomic builtins" +# else /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */ +# define IB_ATOMICS_STARTUP_MSG \ + "Mutexes use GCC atomic builtins, rw_locks do not" +# endif /* HAVE_IB_ATOMIC_PTHREAD_T_GCC */ + /**********************************************************//** Returns the resulting value, ptr is pointer to target, amount is the amount of increment. */ + # define os_atomic_increment(ptr, amount) \ __sync_add_and_fetch(ptr, amount) + # define os_atomic_increment_lint(ptr, amount) \ os_atomic_increment(ptr, amount) + # define os_atomic_increment_ulint(ptr, amount) \ os_atomic_increment(ptr, amount) + /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val */ + # define os_atomic_test_and_set_byte(ptr, new_val) \ __sync_lock_test_and_set(ptr, new_val) + +#elif defined(HAVE_IB_SOLARIS_ATOMICS) + +#define HAVE_ATOMIC_BUILTINS + /* If not compiling with GCC or GCC doesn't support the atomic intrinsics and running on Solaris >= 10 use Solaris atomics */ -#elif defined(HAVE_SOLARIS_ATOMICS) + #include <atomic.h> + /**********************************************************//** Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ + # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ (atomic_cas_ulong(ptr, old_val, new_val) == old_val) + # define os_compare_and_swap_lint(ptr, old_val, new_val) \ ((lint)atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val) -# ifdef INNODB_RW_LOCKS_USE_ATOMICS -# if SIZEOF_PTHREAD_T == 4 + +# ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS +# if SIZEOF_PTHREAD_T == 4 # define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ ((pthread_t)atomic_cas_32(ptr, old_val, new_val) == old_val) # elif SIZEOF_PTHREAD_T == 8 @@ -331,21 +361,35 @@ compare to, new_val is the value to swap in. */ # else # error "SIZEOF_PTHREAD_T != 4 or 8" # endif /* SIZEOF_PTHREAD_T CHECK */ -# endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +# define INNODB_RW_LOCKS_USE_ATOMICS +# define IB_ATOMICS_STARTUP_MSG \ + "Mutexes and rw_locks use Solaris atomic functions" +# else /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */ +# define IB_ATOMICS_STARTUP_MSG \ + "Mutexes use Solaris atomic functions, rw_locks do not" +# endif /* HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS */ /**********************************************************//** Returns the resulting value, ptr is pointer to target, amount is the amount of increment. */ + # define os_atomic_increment_lint(ptr, amount) \ atomic_add_long_nv((ulong_t*) ptr, amount) + # define os_atomic_increment_ulint(ptr, amount) \ atomic_add_long_nv(ptr, amount) + /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val */ + # define os_atomic_test_and_set_byte(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) -/* On Windows, use Windows atomics / interlocked */ + #elif defined(HAVE_WINDOWS_ATOMICS) + +#define HAVE_ATOMIC_BUILTINS + +/* On Windows, use Windows atomics / interlocked */ # ifdef _WIN64 # define win_cmp_and_xchg InterlockedCompareExchange64 # define win_xchg_and_add InterlockedExchangeAdd64 @@ -353,31 +397,46 @@ Returns the old value of *ptr, atomically sets *ptr to new_val */ # define win_cmp_and_xchg InterlockedCompareExchange # define win_xchg_and_add InterlockedExchangeAdd # endif + /**********************************************************//** Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ + # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ (win_cmp_and_xchg(ptr, new_val, old_val) == old_val) + # define os_compare_and_swap_lint(ptr, old_val, new_val) \ (win_cmp_and_xchg(ptr, new_val, old_val) == old_val) -# ifdef INNODB_RW_LOCKS_USE_ATOMICS -# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ + +/* windows thread objects can always be passed to windows atomic functions */ +# define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ (InterlockedCompareExchange(ptr, new_val, old_val) == old_val) -# endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +# define INNODB_RW_LOCKS_USE_ATOMICS +# define IB_ATOMICS_STARTUP_MSG \ + "Mutexes and rw_locks use Windows interlocked functions" + /**********************************************************//** Returns the resulting value, ptr is pointer to target, amount is the amount of increment. */ + # define os_atomic_increment_lint(ptr, amount) \ (win_xchg_and_add(ptr, amount) + amount) + # define os_atomic_increment_ulint(ptr, amount) \ ((ulint) (win_xchg_and_add(ptr, amount) + amount)) + /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val. InterlockedExchange() operates on LONG, and the LONG will be clobbered */ + # define os_atomic_test_and_set_byte(ptr, new_val) \ ((byte) InterlockedExchange(ptr, new_val)) -#endif /* HAVE_GCC_ATOMIC_BUILTINS */ + +#else +# define IB_ATOMICS_STARTUP_MSG \ + "Mutexes and rw_locks use InnoDB's own implementation" +#endif #ifndef UNIV_NONINL #include "os0sync.ic" diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h index a4fe069d022..3899499fb6a 100644 --- a/storage/xtradb/include/page0page.h +++ b/storage/xtradb/include/page0page.h @@ -76,8 +76,11 @@ typedef byte page_header_t; header which are set in a page create */ /*----*/ #define PAGE_LEVEL 26 /* level of the node in an index tree; the - leaf level is the level 0 */ -#define PAGE_INDEX_ID 28 /* index id where the page belongs */ + leaf level is the level 0. This field should + not be written to after page creation. */ +#define PAGE_INDEX_ID 28 /* index id where the page belongs. + This field should not be written to after + page creation. */ #define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in a B-tree: defined only on the root page of a B-tree, but not in the root of an ibuf tree */ diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic index 318ec1cc1f2..8f794410f20 100644 --- a/storage/xtradb/include/page0page.ic +++ b/storage/xtradb/include/page0page.ic @@ -907,7 +907,7 @@ page_get_data_size( /************************************************************//** Allocates a block of memory from the free list of an index page. */ -UNIV_INTERN +UNIV_INLINE void page_mem_alloc_free( /*================*/ diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h index 9aaa066306b..574809e5227 100644 --- a/storage/xtradb/include/page0zip.h +++ b/storage/xtradb/include/page0zip.h @@ -127,8 +127,12 @@ page_zip_decompress( /*================*/ page_zip_des_t* page_zip,/*!< in: data, ssize; out: m_start, m_end, m_nonempty, n_blobs */ - page_t* page) /*!< out: uncompressed page, may be trashed */ - __attribute__((nonnull)); + page_t* page, /*!< out: uncompressed page, may be trashed */ + ibool all) /*!< in: TRUE=decompress the whole page; + FALSE=verify but do not copy some + page header fields that should not change + after page creation */ + __attribute__((nonnull(1,2))); #ifdef UNIV_DEBUG /**********************************************************************//** @@ -385,8 +389,8 @@ IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a non-clustered index, the caller must update the insert buffer free bits in the same mini-transaction in such a way that the modification will be redo-logged. -@return TRUE on success, FALSE on failure; page and page_zip will be -left intact on failure. */ +@return TRUE on success, FALSE on failure; page_zip will be left +intact on failure, but page will be overwritten. */ UNIV_INTERN ibool page_zip_reorganize( diff --git a/storage/xtradb/include/pars0pars.h b/storage/xtradb/include/pars0pars.h index a7de7f2292e..fe5d76ebbb0 100644 --- a/storage/xtradb/include/pars0pars.h +++ b/storage/xtradb/include/pars0pars.h @@ -583,6 +583,12 @@ pars_info_get_bound_id( pars_info_t* info, /*!< in: info struct */ const char* name); /*!< in: bound id name to find */ +/******************************************************************//** +Release any resources used by the lexer. */ +UNIV_INTERN +void +pars_lexer_close(void); +/*==================*/ /** Extra information supplied for pars_sql(). */ struct pars_info_struct { diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h index 9d2950bbb01..421308af49b 100644 --- a/storage/xtradb/include/rem0cmp.h +++ b/storage/xtradb/include/rem0cmp.h @@ -89,7 +89,7 @@ cmp_dfield_dfield( /*************************************************************//** This function is used to compare a data tuple to a physical record. Only dtuple->n_fields_cmp first fields are taken into account for -the the data tuple! If we denote by n = n_fields_cmp, then rec must +the data tuple! If we denote by n = n_fields_cmp, then rec must have either m >= n fields, or it must differ from dtuple in some of the m fields rec has. If rec has an externally stored field we do not compare it but return with value 0 if such a comparison should be diff --git a/storage/xtradb/include/rem0rec.ic b/storage/xtradb/include/rem0rec.ic index 9fe736f9b0b..8e5bd9a7fcd 100644 --- a/storage/xtradb/include/rem0rec.ic +++ b/storage/xtradb/include/rem0rec.ic @@ -65,7 +65,7 @@ most significant bytes and bits are written below less significant. - offset_of_this_record) mod 64Ki, where mod is the modulo as a non-negative number; - we can calculate the the offset of the next + we can calculate the offset of the next record with the formula: relative_offset + offset_of_this_record mod UNIV_PAGE_SIZE diff --git a/storage/xtradb/include/row0ins.h b/storage/xtradb/include/row0ins.h index 530622e6225..9f93565ddb7 100644 --- a/storage/xtradb/include/row0ins.h +++ b/storage/xtradb/include/row0ins.h @@ -45,7 +45,7 @@ row_ins_check_foreign_constraint( /*=============================*/ ibool check_ref,/*!< in: TRUE If we want to check that the referenced table is ok, FALSE if we - want to to check the foreign key table */ + want to check the foreign key table */ dict_foreign_t* foreign,/*!< in: foreign constraint; NOTE that the tables mentioned in it must be in the dictionary cache if they exist at all */ diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index 97028622505..b05241f00f8 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -177,7 +177,9 @@ row_update_prebuilt_trx( in MySQL handle */ trx_t* trx); /*!< in: transaction handle */ /*********************************************************************//** -Unlocks AUTO_INC type locks that were possibly reserved by a trx. */ +Unlocks AUTO_INC type locks that were possibly reserved by a trx. This +function should be called at the the end of an SQL statement, by the +connection thread that owns the transaction (trx->mysql_thd). */ UNIV_INTERN void row_unlock_table_autoinc_for_mysql( @@ -754,8 +756,6 @@ struct row_prebuilt_struct { store it here so that we can return it to MySQL */ /*----------------------*/ - UT_LIST_NODE_T(row_prebuilt_t) prebuilts; - /*!< list node of table->prebuilts */ ulint magic_n2; /*!< this should be the same as magic_n */ }; diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index d98e2ad5013..dc136d89b8c 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -80,6 +80,9 @@ at a time */ #define SRV_AUTO_EXTEND_INCREMENT \ (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) +/* prototypes for new functions added to ha_innodb.cc */ +ibool innobase_get_slow_log(); + /* This is set to TRUE if the MySQL user has set it in MySQL */ extern ibool srv_lower_case_table_names; @@ -133,8 +136,9 @@ extern ulint* srv_data_file_is_raw_partition; extern ibool srv_extra_undoslots; extern ibool srv_fast_recovery; +extern ibool srv_recovery_stats; -extern ibool srv_use_purge_thread; +extern ulint srv_use_purge_thread; extern ibool srv_auto_extend_last_data_file; extern ulint srv_last_file_size_max; @@ -235,12 +239,14 @@ extern ulong srv_replication_delay; extern long long srv_ibuf_max_size; extern ulong srv_ibuf_active_contract; extern ulong srv_ibuf_accel_rate; +extern ulint srv_checkpoint_age_target; extern ulong srv_flush_neighbor_pages; extern ulong srv_enable_unsafe_group_commit; extern ulong srv_read_ahead; extern ulong srv_adaptive_checkpoint; extern ulong srv_expand_import; +extern ulint srv_relax_table_creation; extern ulong srv_extra_rsegments; extern ulong srv_dict_size_limit; @@ -345,10 +351,6 @@ extern ulint srv_buf_pool_flushed; /** Number of buffer pool reads that led to the reading of a disk page */ extern ulint srv_buf_pool_reads; -/** Number of sequential read-aheads */ -extern ulint srv_read_ahead_seq; -/** Number of random read-aheads */ -extern ulint srv_read_ahead_rnd; /** Status variables to be passed to MySQL */ typedef struct export_var_struct export_struc; @@ -428,6 +430,7 @@ enum srv_thread_type { SRV_INSERT, /**< thread flushing the insert buffer to disk */ #endif SRV_PURGE, /* thread purging undo records */ + SRV_PURGE_WORKER, /* thread purging undo records */ SRV_MASTER /**< the master thread, (whose type number must be biggest) */ }; @@ -446,7 +449,7 @@ void srv_init(void); /*==========*/ /*********************************************************************//** -Frees the OS fast mutex created in srv_boot(). */ +Frees the data structures created in srv_init(). */ UNIV_INTERN void srv_free(void); @@ -509,6 +512,13 @@ srv_purge_thread( /*=============*/ void* arg); /* in: a dummy parameter required by os_thread_create */ +/************************************************************************* +The undo purge thread. */ +UNIV_INTERN +os_thread_ret_t +srv_purge_worker_thread( +/*====================*/ + void* arg); /*******************************************************************//** Tells the Innobase server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used @@ -645,13 +655,13 @@ struct export_var_struct{ #ifdef UNIV_DEBUG ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */ #endif /* UNIV_DEBUG */ - ulint innodb_buffer_pool_read_requests; /*!< buf_pool->n_page_gets */ + ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */ ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */ ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */ ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */ ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */ - ulint innodb_buffer_pool_read_ahead_seq;/*!< srv_read_ahead_seq */ - ulint innodb_buffer_pool_read_ahead_rnd;/*!< srv_read_ahead_rnd */ + ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */ + ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/ ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */ ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */ ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */ @@ -663,9 +673,9 @@ struct export_var_struct{ ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */ ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */ ulint innodb_page_size; /*!< UNIV_PAGE_SIZE */ - ulint innodb_pages_created; /*!< buf_pool->n_pages_created */ - ulint innodb_pages_read; /*!< buf_pool->n_pages_read */ - ulint innodb_pages_written; /*!< buf_pool->n_pages_written */ + ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */ + ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read */ + ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */ ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */ ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */ ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index aedfd5f3f86..85fa014d77a 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -120,7 +120,7 @@ is necessary only if the memory block containing it is freed. */ # endif /* UNIV_SYNC_DEBUG */ #else /* UNIV_DEBUG */ # define rw_lock_create(L, level) \ - rw_lock_create_func((L), __FILE__, __LINE__) + rw_lock_create_func((L), #L, NULL, 0) #endif /* UNIV_DEBUG */ /******************************************************************//** @@ -137,8 +137,8 @@ rw_lock_create_func( # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ - const char* cmutex_name, /*!< in: mutex name */ #endif /* UNIV_DEBUG */ + const char* cmutex_name, /*!< in: mutex name */ const char* cfile_name, /*!< in: file name where created */ ulint cline); /*!< in: file line where created */ /******************************************************************//** @@ -540,7 +540,8 @@ struct rw_lock_struct { ulint level; /*!< Level in the global latching order. */ #endif /* UNIV_SYNC_DEBUG */ ulint count_os_wait; /*!< Count of os_waits. May not be accurate */ - const char* cfile_name;/*!< File name where lock created */ + //const char* cfile_name;/*!< File name where lock created */ + const char* lock_name;/*!< lock name */ /* last s-lock file/line is not guaranteed to be correct */ const char* last_s_file_name;/*!< File name where last s-locked */ const char* last_x_file_name;/*!< File name where last x-locked */ @@ -551,7 +552,7 @@ struct rw_lock_struct { are at the start of this struct, thus we can peek this field without causing much memory bus traffic */ - unsigned cline:14; /*!< Line where created */ + //unsigned cline:14; /*!< Line where created */ unsigned last_s_line:14; /*!< Line number where last time s-locked */ unsigned last_x_line:14; /*!< Line number where last time x-locked */ ulint magic_n; /*!< RW_LOCK_MAGIC_N */ diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index 966bcced722..c653e44b5bd 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -80,7 +80,7 @@ necessary only if the memory block containing it is freed. */ # endif #else # define mutex_create(M, level) \ - mutex_create_func((M), __FILE__, __LINE__) + mutex_create_func((M), #M, NULL, 0) #endif /******************************************************************//** @@ -93,8 +93,8 @@ void mutex_create_func( /*==============*/ mutex_t* mutex, /*!< in: pointer to memory */ -#ifdef UNIV_DEBUG const char* cmutex_name, /*!< in: mutex name */ +#ifdef UNIV_DEBUG # ifdef UNIV_SYNC_DEBUG ulint level, /*!< in: level */ # endif /* UNIV_SYNC_DEBUG */ @@ -513,7 +513,7 @@ struct mutex_struct { os_fast_mutex; /*!< We use this OS mutex in place of lock_word when atomic operations are not enabled */ #endif - ulint waiters; /*!< This ulint is set to 1 if there are (or + volatile ulint waiters; /*!< This ulint is set to 1 if there are (or may be) threads waiting in the global wait array for this mutex to be released. Otherwise, this is 0. */ @@ -524,9 +524,9 @@ struct mutex_struct { ulint line; /*!< Line where the mutex was locked */ ulint level; /*!< Level in the global latching order */ #endif /* UNIV_SYNC_DEBUG */ +#ifdef UNIV_DEBUG const char* cfile_name;/*!< File name where mutex created */ ulint cline; /*!< Line where created */ -#ifdef UNIV_DEBUG os_thread_id_t thread_id; /*!< The thread id of the thread which locked the mutex. */ ulint magic_n; /*!< MUTEX_MAGIC_N */ @@ -541,9 +541,9 @@ struct mutex_struct { ulong count_os_yield; /*!< count of os_wait */ ulonglong lspent_time; /*!< mutex os_wait timer msec */ ulonglong lmax_spent_time;/*!< mutex os_wait timer msec */ - const char* cmutex_name; /*!< mutex name */ ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */ #endif /* UNIV_DEBUG */ + const char* cmutex_name; /*!< mutex name */ }; /** The global array of wait cells for implementation of the databases own diff --git a/storage/xtradb/include/thr0loc.h b/storage/xtradb/include/thr0loc.h index 34c5232238d..293d1ebd57f 100644 --- a/storage/xtradb/include/thr0loc.h +++ b/storage/xtradb/include/thr0loc.h @@ -39,6 +39,12 @@ UNIV_INTERN void thr_local_init(void); /*================*/ + /****************************************************************//** +Close the thread local storage module. */ +UNIV_INTERN +void +thr_local_close(void); +/*=================*/ /*******************************************************************//** Creates a local storage struct for the calling new thread. */ UNIV_INTERN diff --git a/storage/xtradb/include/trx0i_s.h b/storage/xtradb/include/trx0i_s.h index 9bf032de9f9..7bd4e1b88c8 100644 --- a/storage/xtradb/include/trx0i_s.h +++ b/storage/xtradb/include/trx0i_s.h @@ -141,6 +141,13 @@ void trx_i_s_cache_init( /*===============*/ trx_i_s_cache_t* cache); /*!< out: cache to init */ +/*******************************************************************//** +Free the INFORMATION SCHEMA trx related cache. */ +UNIV_INTERN +void +trx_i_s_cache_free( +/*===============*/ + trx_i_s_cache_t* cache); /*!< in/out: cache to free */ /*******************************************************************//** Issue a shared/read lock on the tables cache. */ diff --git a/storage/xtradb/include/trx0purge.h b/storage/xtradb/include/trx0purge.h index 7812ad7eb92..ae5bc6f90be 100644 --- a/storage/xtradb/include/trx0purge.h +++ b/storage/xtradb/include/trx0purge.h @@ -71,6 +71,12 @@ void trx_purge_sys_create(void); /*======================*/ /********************************************************************//** +Frees the global purge system control structure. */ +UNIV_INTERN +void +trx_purge_sys_close(void); +/*======================*/ +/************************************************************************ Adds the update undo log as the first log in the history list. Removes the update undo log segment from the rseg slot if it is too big for reuse. */ UNIV_INTERN @@ -108,6 +114,25 @@ UNIV_INTERN ulint trx_purge(void); /*===========*/ +/********************************************************************** +This function runs a purge worker batch */ +UNIV_INTERN +void +trx_purge_worker( +/*=============*/ + ulint worker_id); +/********************************************************************** +This function waits the event for worker batch */ +UNIV_INTERN +void +trx_purge_worker_wait(void); +/*========================*/ +/********************************************************************** +This function wakes the waiting worker batch */ +UNIV_INTERN +void +trx_purge_worker_wake(void); +/*========================*/ /******************************************************************//** Prints information of the purge system to stderr. */ UNIV_INTERN @@ -125,6 +150,11 @@ struct trx_purge_struct{ of the trx system and it never ends */ que_t* query; /*!< The query graph which will do the parallelized purge operation */ + ulint n_worker; + os_event_t worker_event; + sess_t** sess_arr; + trx_t** trx_arr; + que_t** query_arr; rw_lock_t latch; /*!< The latch protecting the purge view. A purge operation must acquire an x-latch here for the instant at which diff --git a/storage/xtradb/include/trx0rec.h b/storage/xtradb/include/trx0rec.h index 0ae82c33afe..a6e56e963c6 100644 --- a/storage/xtradb/include/trx0rec.h +++ b/storage/xtradb/include/trx0rec.h @@ -44,8 +44,8 @@ UNIV_INLINE trx_undo_rec_t* trx_undo_rec_copy( /*==============*/ - trx_undo_rec_t* undo_rec, /*!< in: undo log record */ - mem_heap_t* heap); /*!< in: heap where copied */ + const trx_undo_rec_t* undo_rec, /*!< in: undo log record */ + mem_heap_t* heap); /*!< in: heap where copied */ /**********************************************************************//** Reads the undo log record type. @return record type */ diff --git a/storage/xtradb/include/trx0rec.ic b/storage/xtradb/include/trx0rec.ic index 037b5d4f6cf..e7e41d6d9f6 100644 --- a/storage/xtradb/include/trx0rec.ic +++ b/storage/xtradb/include/trx0rec.ic @@ -100,8 +100,8 @@ UNIV_INLINE trx_undo_rec_t* trx_undo_rec_copy( /*==============*/ - trx_undo_rec_t* undo_rec, /*!< in: undo log record */ - mem_heap_t* heap) /*!< in: heap where copied */ + const trx_undo_rec_t* undo_rec, /*!< in: undo log record */ + mem_heap_t* heap) /*!< in: heap where copied */ { ulint len; diff --git a/storage/xtradb/include/trx0roll.h b/storage/xtradb/include/trx0roll.h index ddca9e9e4ef..1dee5655c8c 100644 --- a/storage/xtradb/include/trx0roll.h +++ b/storage/xtradb/include/trx0roll.h @@ -133,6 +133,17 @@ trx_rollback( Rollback or clean up any incomplete transactions which were encountered in crash recovery. If the transaction already was committed, then we clean up a possible insert undo log. If the +transaction was not yet committed, then we roll it back. */ +UNIV_INTERN +void +trx_rollback_or_clean_recovered( +/*============================*/ + ibool all); /*!< in: FALSE=roll back dictionary transactions; + TRUE=roll back all non-PREPARED transactions */ +/*******************************************************************//** +Rollback or clean up any incomplete transactions which were +encountered in crash recovery. If the transaction already was +committed, then we clean up a possible insert undo log. If the transaction was not yet committed, then we roll it back. Note: this is done in a background thread. @return a dummy parameter */ @@ -208,9 +219,9 @@ int trx_general_rollback_for_mysql( /*===========================*/ trx_t* trx, /*!< in: transaction handle */ - ibool partial,/*!< in: TRUE if partial rollback requested */ trx_savept_t* savept);/*!< in: pointer to savepoint undo number, if - partial rollback requested */ + partial rollback requested, or NULL for + complete rollback */ /*******************************************************************//** Rolls back a transaction back to a named savepoint. Modifications after the savepoint are undone but InnoDB does NOT release the corresponding locks diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h index f0f7a47279e..0d7dc60329f 100644 --- a/storage/xtradb/include/trx0rseg.h +++ b/storage/xtradb/include/trx0rseg.h @@ -125,6 +125,13 @@ trx_rseg_create( ulint max_size, /*!< in: max size in pages */ ulint* id, /*!< out: rseg id */ mtr_t* mtr); /*!< in: mtr */ +/*************************************************************************** +Free's an instance of the rollback segment in memory. */ +UNIV_INTERN +void +trx_rseg_mem_free( +/*==============*/ + trx_rseg_t* rseg); /* in, own: instance to free */ /* Real max value may be 4076 in usual. But reserve 4 slot for safety or etc... */ diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h index 1f3f11926f8..8789d575bca 100644 --- a/storage/xtradb/include/trx0sys.h +++ b/storage/xtradb/include/trx0sys.h @@ -344,6 +344,12 @@ void trx_sys_file_format_tag_init(void); /*==============================*/ /*****************************************************************//** +Shutdown/Close the transaction system. */ +UNIV_INTERN +void +trx_sys_close(void); +/*===============*/ +/*****************************************************************//** Get the name representation of the file format from its id. @return pointer to the name */ UNIV_INTERN diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic index 1c7c732751b..820d31d0692 100644 --- a/storage/xtradb/include/trx0sys.ic +++ b/storage/xtradb/include/trx0sys.ic @@ -34,11 +34,11 @@ typedef byte trx_sysf_rseg_t; /* Rollback segment specification slot offsets */ /*-------------------------------------------------------------*/ -#define TRX_SYS_RSEG_SPACE 0 /* space where the the segment +#define TRX_SYS_RSEG_SPACE 0 /* space where the segment header is placed; starting with MySQL/InnoDB 5.1.7, this is UNIV_UNDEFINED if the slot is unused */ -#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the the segment +#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the segment header is placed; this is FIL_NULL if the slot is unused */ /*-------------------------------------------------------------*/ diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index da031d9d5d3..3b845e498d0 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -179,7 +179,7 @@ trx_commit_off_kernel( /****************************************************************//** Cleans up a transaction at database startup. The cleanup is needed if the transaction already got to the middle of a commit when the database -crashed, andf we cannot roll it back. */ +crashed, and we cannot roll it back. */ UNIV_INTERN void trx_cleanup_at_db_startup( @@ -360,7 +360,7 @@ enum trx_dict_op { operation modes in crash recovery. */ TRX_DICT_OP_TABLE = 1, /** The transaction is creating or dropping an index in an - existing table. In crash recovery, the the data dictionary + existing table. In crash recovery, the data dictionary must be locked, but the table must not be dropped. */ TRX_DICT_OP_INDEX = 2 }; @@ -729,6 +729,17 @@ struct trx_struct{ /*------------------------------*/ char detailed_error[256]; /*!< detailed error message for last error, or empty. */ + /*------------------------------*/ + ulint io_reads; + ib_uint64_t io_read; + ulint io_reads_wait_timer; + ib_uint64_t lock_que_wait_ustarted; + ulint lock_que_wait_timer; + ulint innodb_que_wait_timer; + ulint distinct_page_access; +#define DPAH_SIZE 8192 + byte* distinct_page_access_hash; + ibool take_stats; }; #define TRX_MAX_N_THREADS 32 /* maximum number of diff --git a/storage/xtradb/include/trx0types.h b/storage/xtradb/include/trx0types.h index 08cc9622d02..24cf57d53d5 100644 --- a/storage/xtradb/include/trx0types.h +++ b/storage/xtradb/include/trx0types.h @@ -70,7 +70,7 @@ typedef struct trx_named_savept_struct trx_named_savept_t; enum trx_rb_ctx { RB_NONE = 0, /*!< no rollback */ RB_NORMAL, /*!< normal rollback */ - RB_RECOVERY, /*!< rolling back an incomplete transaction, + RB_RECOVERY /*!< rolling back an incomplete transaction, in crash recovery */ }; diff --git a/storage/xtradb/include/trx0undo.h b/storage/xtradb/include/trx0undo.h index 4db10eaa92e..a084f2394b5 100644 --- a/storage/xtradb/include/trx0undo.h +++ b/storage/xtradb/include/trx0undo.h @@ -333,6 +333,13 @@ trx_undo_parse_discard_latest( byte* end_ptr,/*!< in: buffer end */ page_t* page, /*!< in: page or NULL */ mtr_t* mtr); /*!< in: mtr or NULL */ +/************************************************************************ +Frees an undo log memory copy. */ +UNIV_INTERN +void +trx_undo_mem_free( +/*==============*/ + trx_undo_t* undo); /* in: the undo object to be freed */ /* Types of an undo log segment */ #define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 5ccb7c39551..26097941c5f 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -46,12 +46,12 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 1 #define INNODB_VERSION_MINOR 0 -#define INNODB_VERSION_BUGFIX 4 -#define PERCONA_INNODB_VERSION 8 +#define INNODB_VERSION_BUGFIX 6 +#define PERCONA_INNODB_VERSION 9 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; -calculated in in make_version_string() in sql/sql_show.cc like this: +calculated in make_version_string() in sql/sql_show.cc like this: "version >> 8" . "version & 0xff" because the version is shown with only one dot, we skip the last component, i.e. we show M.N.P as M.N */ @@ -59,13 +59,14 @@ component, i.e. we show M.N.P as M.N */ (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR) /* auxiliary macros to help creating the version as string */ -#define __INNODB_VERSION(a, b, c, d) (#a "." #b "." #c "-" #d) -#define _INNODB_VERSION(a, b, c, d) __INNODB_VERSION(a, b, c, d) +#define __INNODB_VERSION(a, b, c, d) (#a "." #b "." #c "-" #d) +#define _INNODB_VERSION(a, b, c, d) __INNODB_VERSION(a, b, c, d) + #define INNODB_VERSION_STR \ _INNODB_VERSION(INNODB_VERSION_MAJOR, \ INNODB_VERSION_MINOR, \ - INNODB_VERSION_BUGFIX, \ + INNODB_VERSION_BUGFIX, \ PERCONA_INNODB_VERSION) #define REFMAN "http://dev.mysql.com/doc/refman/5.1/en/" @@ -80,17 +81,25 @@ the virtual method table (vtable) in GCC 3. */ # define ha_innobase ha_innodb #endif /* MYSQL_DYNAMIC_PLUGIN */ +/* if any of the following macros is defined at this point this means +that the code from the "right" plug.in was executed and we do not +need to include ut0auxconf.h which would either define the same macros +or will be empty */ +#if !defined(HAVE_IB_GCC_ATOMIC_BUILTINS) \ + && !defined(HAVE_IB_ATOMIC_PTHREAD_T_GCC) \ + && !defined(HAVE_IB_SOLARIS_ATOMICS) \ + && !defined(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) \ + && !defined(SIZEOF_PTHREAD_T) \ + && !defined(HAVE_IB_PAUSE_INSTRUCTION) +# include "ut0auxconf.h" +#endif + #if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__) # undef __WIN__ # define __WIN__ # include <windows.h> -# if defined(HAVE_WINDOWS_ATOMICS) -/* If atomics are defined we use them in InnoDB mutex implementation */ -# define HAVE_ATOMIC_BUILTINS -# endif /* HAVE_WINDOWS_ATOMICS */ - # ifdef _NT_ # define __NT__ # endif @@ -113,45 +122,17 @@ if we are compiling on Windows. */ # include <sys/mman.h> /* mmap() for os0proc.c */ # endif -# undef PACKAGE -# undef VERSION - /* Include the header file generated by GNU autoconf */ # ifndef __WIN__ -#ifndef UNIV_HOTBACKUP -# include "config.h" -#endif /* UNIV_HOTBACKUP */ +# ifndef UNIV_HOTBACKUP +# include "config.h" +# endif /* UNIV_HOTBACKUP */ # endif # ifdef HAVE_SCHED_H # include <sched.h> # endif -# if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMICS) \ - || defined(HAVE_WINDOWS_ATOMICS) -/* If atomics are defined we use them in InnoDB mutex implementation */ -# define HAVE_ATOMIC_BUILTINS -# endif /* (HAVE_GCC_ATOMIC_BUILTINS) || (HAVE_SOLARIS_ATOMICS) - || (HAVE_WINDOWS_ATOMICS) */ - -/* For InnoDB rw_locks to work with atomics we need the thread_id -to be no more than machine word wide. The following enables using -atomics for InnoDB rw_locks where these conditions are met. */ -#ifdef HAVE_ATOMIC_BUILTINS -/* if HAVE_ATOMIC_PTHREAD_T is defined at this point that means that -the code from plug.in has defined it and we do not need to include -ut0auxconf.h which would either define HAVE_ATOMIC_PTHREAD_T or will -be empty */ -# ifndef HAVE_ATOMIC_PTHREAD_T -# include "ut0auxconf.h" -# endif /* HAVE_ATOMIC_PTHREAD_T */ -/* now HAVE_ATOMIC_PTHREAD_T is eventually defined either by plug.in or -from Makefile.in->ut0auxconf.h */ -# ifdef HAVE_ATOMIC_PTHREAD_T -# define INNODB_RW_LOCKS_USE_ATOMICS -# endif /* HAVE_ATOMIC_PTHREAD_T */ -#endif /* HAVE_ATOMIC_BUILTINS */ - /* We only try to do explicit inlining of functions with gcc and Sun Studio */ @@ -198,12 +179,18 @@ command. Not tested on Windows. */ debugging without UNIV_DEBUG */ #define UNIV_DEBUG /* Enable ut_ad() assertions and disable UNIV_INLINE */ +#define UNIV_DEBUG_LOCK_VALIDATE /* Enable + ut_ad(lock_rec_validate_page()) + assertions. */ #define UNIV_DEBUG_FILE_ACCESSES /* Debug .ibd file access (field file_page_was_freed in buf_page_t) */ #define UNIV_LRU_DEBUG /* debug the buffer pool LRU */ #define UNIV_HASH_DEBUG /* debug HASH_ macros */ #define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ +#define UNIV_LOG_LSN_DEBUG /* write LSN to the redo log; +this will break redo log file compatibility, but it may be useful when +debugging redo log application problems. */ #define UNIV_MEM_DEBUG /* detect memory leaks etc */ #define UNIV_IBUF_DEBUG /* debug the insert buffer */ #define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer; @@ -253,7 +240,7 @@ by one. */ /* Linkage specifier for non-static InnoDB symbols (variables and functions) that are only referenced from within InnoDB, not from MySQL */ -#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(UNIV_HOTBACKUP) +#if defined(__GNUC__) && (__GNUC__ >= 4) || defined(__INTEL_COMPILER) # define UNIV_INTERN __attribute__((visibility ("hidden"))) #else # define UNIV_INTERN @@ -410,7 +397,9 @@ it is read. */ /* Minimize cache-miss latency by moving data at addr into a cache before it is read or written. */ # define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3) -#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) +/* Sun Studio includes sun_prefetch.h as of version 5.9 */ +#elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \ + || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590) # include <sun_prefetch.h> #if __SUNPRO_C >= 0x550 # undef UNIV_INTERN diff --git a/storage/xtradb/include/usr0sess.h b/storage/xtradb/include/usr0sess.h index 7638a0c69e2..2c288f7d455 100644 --- a/storage/xtradb/include/usr0sess.h +++ b/storage/xtradb/include/usr0sess.h @@ -44,14 +44,12 @@ sess_t* sess_open(void); /*============*/ /*********************************************************************//** -Closes a session, freeing the memory occupied by it, if it is in a state -where it should be closed. -@return TRUE if closed */ +Closes a session, freeing the memory occupied by it. */ UNIV_INTERN -ibool -sess_try_close( -/*===========*/ - sess_t* sess); /*!< in, own: session object */ +void +sess_close( +/*=======*/ + sess_t* sess); /* in, own: session object */ /* The session handle. All fields are protected by the kernel mutex */ struct sess_struct{ diff --git a/storage/xtradb/include/ut0auxconf.h b/storage/xtradb/include/ut0auxconf.h index 88fb26f1863..16bcc308392 100644 --- a/storage/xtradb/include/ut0auxconf.h +++ b/storage/xtradb/include/ut0auxconf.h @@ -1,14 +1,14 @@ /* Do not remove this file even though it is empty. This file is included in univ.i and will cause compilation failure if not present. -A custom check has been added in the generated +A custom checks have been added in the generated storage/innobase/Makefile.in that is shipped with the InnoDB Plugin -source archive. This check tries to compile a test program and if -successful then adds "#define HAVE_ATOMIC_PTHREAD_T" to this file. -This is a hack that has been developed in order to check for pthread_t -atomicity without the need to regenerate the ./configure script that is +source archive. These checks eventually define some macros and put +them in this file. +This is a hack that has been developed in order to deploy new compile +time checks without the need to regenerate the ./configure script that is distributed in the MySQL 5.1 official source archives. If by any chance Makefile.in and ./configure are regenerated and thus -the hack from Makefile.in wiped away then the "real" check from plug.in +the hack from Makefile.in wiped away then the "real" checks from plug.in will take over. */ diff --git a/storage/xtradb/include/ut0byte.h b/storage/xtradb/include/ut0byte.h index a2687e62f08..f55e2888c60 100644 --- a/storage/xtradb/include/ut0byte.h +++ b/storage/xtradb/include/ut0byte.h @@ -219,8 +219,8 @@ UNIV_INLINE void* ut_align( /*=====*/ - void* ptr, /*!< in: pointer */ - ulint align_no); /*!< in: align by this number */ + const void* ptr, /*!< in: pointer */ + ulint align_no); /*!< in: align by this number */ /*********************************************************//** The following function rounds down a pointer to the nearest aligned address. diff --git a/storage/xtradb/include/ut0byte.ic b/storage/xtradb/include/ut0byte.ic index e3beed65138..3dd51890cb4 100644 --- a/storage/xtradb/include/ut0byte.ic +++ b/storage/xtradb/include/ut0byte.ic @@ -319,8 +319,8 @@ UNIV_INLINE void* ut_align( /*=====*/ - void* ptr, /*!< in: pointer */ - ulint align_no) /*!< in: align by this number */ + const void* ptr, /*!< in: pointer */ + ulint align_no) /*!< in: align by this number */ { ut_ad(align_no > 0); ut_ad(((align_no - 1) & align_no) == 0); diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h index 80094321041..197b8401428 100644 --- a/storage/xtradb/include/ut0ut.h +++ b/storage/xtradb/include/ut0ut.h @@ -34,6 +34,11 @@ Created 1/20/1994 Heikki Tuuri #define ut0ut_h #include "univ.i" + +#ifndef UNIV_HOTBACKUP +# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */ +#endif /* UNIV_HOTBACKUP */ + #include <time.h> #ifndef MYSQL_SERVER #include <ctype.h> @@ -47,7 +52,8 @@ Created 1/20/1994 Heikki Tuuri /** Time stamp */ typedef time_t ib_time_t; -#if defined(IB_HAVE_PAUSE_INSTRUCTION) +#ifndef UNIV_HOTBACKUP +#if defined(HAVE_IB_PAUSE_INSTRUCTION) # ifdef WIN32 /* In the Win32 API, the x86 PAUSE instruction is executed by calling the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- @@ -84,6 +90,7 @@ do { \ os_thread_sleep(2000 /* 2 ms */); \ } \ } while (0) +#endif /* !UNIV_HOTBACKUP */ /********************************************************//** Gets the high 32 bits in a ulint. That is makes a shift >> 32, @@ -216,6 +223,7 @@ UNIV_INTERN ib_time_t ut_time(void); /*=========*/ +#ifndef UNIV_HOTBACKUP /**********************************************************//** Returns system time. Upon successful completion, the value 0 is returned; otherwise the @@ -239,6 +247,16 @@ ullint ut_time_us( /*=======*/ ullint* tloc); /*!< out: us since epoch, if non-NULL */ +/**********************************************************//** +Returns the number of milliseconds since some epoch. The +value may wrap around. It should only be used for heuristic +purposes. +@return ms since epoch */ +UNIV_INTERN +ulint +ut_time_ms(void); +/*============*/ +#endif /* !UNIV_HOTBACKUP */ /**********************************************************//** Returns the difference of two times in seconds. |