diff options
Diffstat (limited to 'storage/innobase/include')
213 files changed, 13893 insertions, 3857 deletions
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic index 55bdb289b21..6f7a66b12ac 100644 --- a/storage/innobase/include/btr0btr.ic +++ b/storage/innobase/include/btr0btr.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -81,7 +81,7 @@ btr_page_set_index_id( index_id_t id, /*!< in: index id */ mtr_t* mtr) /*!< in: mtr */ { - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id); page_zip_write_header(page_zip, page + (PAGE_HEADER + PAGE_INDEX_ID), @@ -156,7 +156,7 @@ btr_page_set_level( ut_ad(page && mtr); ut_ad(level <= BTR_MAX_NODE_LEVEL); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level); page_zip_write_header(page_zip, page + (PAGE_HEADER + PAGE_LEVEL), @@ -199,7 +199,7 @@ btr_page_set_next( { ut_ad(page && mtr); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { mach_write_to_4(page + FIL_PAGE_NEXT, next); page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr); } else { @@ -236,7 +236,7 @@ btr_page_set_prev( { ut_ad(page && mtr); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { mach_write_to_4(page + FIL_PAGE_PREV, prev); page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr); } else { @@ -272,7 +272,7 @@ btr_node_ptr_get_child_page_no( page_no = mach_read_from_4(field); - if (UNIV_UNLIKELY(page_no == 0)) { + if (page_no == 0) { fprintf(stderr, "InnoDB: a nonsensical page number 0" " in a node ptr record at offset %lu\n", diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index cbc6103c2ee..354b6982a13 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -57,9 +57,6 @@ page_cur_t* btr_cur_get_page_cur( /*=================*/ const btr_cur_t* cursor);/*!< in: tree cursor */ -#else /* UNIV_DEBUG */ -# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur) -#endif /* UNIV_DEBUG */ /*********************************************************//** Returns the buffer block on which the tree cursor is positioned. @return pointer to buffer block */ @@ -67,7 +64,7 @@ UNIV_INLINE buf_block_t* btr_cur_get_block( /*==============*/ - btr_cur_t* cursor);/*!< in: tree cursor */ + const btr_cur_t* cursor);/*!< in: tree cursor */ /*********************************************************//** Returns the record pointer of a tree cursor. @return pointer to record */ @@ -75,7 +72,12 @@ UNIV_INLINE rec_t* btr_cur_get_rec( /*============*/ - btr_cur_t* cursor);/*!< in: tree cursor */ + const btr_cur_t* cursor);/*!< in: tree cursor */ +#else /* UNIV_DEBUG */ +# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur) +# define btr_cur_get_block(cursor) ((cursor)->page_cur.block) +# define btr_cur_get_rec(cursor) ((cursor)->page_cur.rec) +#endif /* UNIV_DEBUG */ /*********************************************************//** Returns the compressed page on which the tree cursor is positioned. @return pointer to compressed page, or NULL if the page is not compressed */ @@ -101,12 +103,9 @@ btr_cur_get_page( btr_cur_t* cursor);/*!< in: tree cursor */ /*********************************************************//** Returns the index of a cursor. +@param cursor b-tree cursor @return index */ -UNIV_INLINE -dict_index_t* -btr_cur_get_index( -/*==============*/ - btr_cur_t* cursor);/*!< in: B-tree cursor */ +#define btr_cur_get_index(cursor) ((cursor)->index) /*********************************************************//** Positions a tree cursor at a given record. */ UNIV_INLINE @@ -474,7 +473,8 @@ btr_estimate_n_rows_in_range( /*******************************************************************//** Estimates the number of different key values in a given index, for each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. +The estimates are stored in the array index->stat_n_diff_key_vals[] and +the number of pages that were sampled is saved in index->stat_n_sample_sizes[]. If innodb_stats_method is nulls_ignored, we also record the number of non-null values for each prefix and stored the estimates in array index->stat_n_non_null_key_vals. */ @@ -595,6 +595,23 @@ btr_copy_externally_stored_field_prefix( a lock or a page latch */ ulint local_len);/*!< in: length of data, in bytes */ /*******************************************************************//** +Copies an externally stored field of a record to mem heap. The +clustered index record must be protected by a lock or a page latch. +@return the whole field copied to heap */ +UNIV_INTERN +byte* +btr_copy_externally_stored_field( +/*=============================*/ + ulint* len, /*!< out: length of the whole field */ + const byte* data, /*!< in: 'internally' stored part of the + field containing also the reference to + the external part; must be protected by + a lock or a page latch */ + ulint zip_size,/*!< in: nonzero=compressed BLOB page size, + zero for uncompressed BLOBs */ + ulint local_len,/*!< in: length of data */ + mem_heap_t* heap); /*!< in: mem heap */ +/*******************************************************************//** Copies an externally stored field of a record to mem heap. @return the field copied to heap, or NULL if the field is incomplete */ UNIV_INTERN diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic index e31f77c77eb..540417e3062 100644 --- a/storage/innobase/include/btr0cur.ic +++ b/storage/innobase/include/btr0cur.ic @@ -38,7 +38,7 @@ btr_cur_get_page_cur( { return(&((btr_cur_t*) cursor)->page_cur); } -#endif /* UNIV_DEBUG */ + /*********************************************************//** Returns the buffer block on which the tree cursor is positioned. @return pointer to buffer block */ @@ -46,7 +46,7 @@ UNIV_INLINE buf_block_t* btr_cur_get_block( /*==============*/ - btr_cur_t* cursor) /*!< in: tree cursor */ + const btr_cur_t* cursor) /*!< in: tree cursor */ { return(page_cur_get_block(btr_cur_get_page_cur(cursor))); } @@ -58,10 +58,11 @@ UNIV_INLINE rec_t* btr_cur_get_rec( /*============*/ - btr_cur_t* cursor) /*!< in: tree cursor */ + const btr_cur_t* cursor) /*!< in: tree cursor */ { - return(page_cur_get_rec(&(cursor->page_cur))); + return(page_cur_get_rec(btr_cur_get_page_cur(cursor))); } +#endif /* UNIV_DEBUG */ /*********************************************************//** Returns the compressed page on which the tree cursor is positioned. @@ -99,18 +100,6 @@ btr_cur_get_page( } /*********************************************************//** -Returns the index of a cursor. -@return index */ -UNIV_INLINE -dict_index_t* -btr_cur_get_index( -/*==============*/ - btr_cur_t* cursor) /*!< in: B-tree cursor */ -{ - return(cursor->index); -} - -/*********************************************************//** Positions a tree cursor at a given record. */ UNIV_INLINE void diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index 2ebd70a6f23..a8eaac4690b 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -92,9 +92,10 @@ Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ UNIV_INLINE void -btr_pcur_open_func( -/*===============*/ +btr_pcur_open_low( +/*==============*/ dict_index_t* index, /*!< in: index */ + ulint level, /*!< in: level in the btree */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; NOTE that if the search is made using a unique @@ -108,7 +109,7 @@ btr_pcur_open_func( ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mtr */ #define btr_pcur_open(i,t,md,l,c,m) \ - btr_pcur_open_func(i,t,md,l,c,__FILE__,__LINE__,m) + btr_pcur_open_low(i,0,t,md,l,c,__FILE__,__LINE__,m) /**************************************************************//** Opens an persistent cursor to an index tree without initializing the cursor. */ diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic index 054ce753c7d..a27033c4a7c 100644 --- a/storage/innobase/include/btr0pcur.ic +++ b/storage/innobase/include/btr0pcur.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -406,9 +406,10 @@ Initializes and opens a persistent cursor to an index tree. It should be closed with btr_pcur_close. */ UNIV_INLINE void -btr_pcur_open_func( -/*===============*/ +btr_pcur_open_low( +/*==============*/ dict_index_t* index, /*!< in: index */ + ulint level, /*!< in: level in the btree */ const dtuple_t* tuple, /*!< in: tuple on which search done */ ulint mode, /*!< in: PAGE_CUR_L, ...; NOTE that if the search is made using a unique @@ -435,7 +436,7 @@ btr_pcur_open_func( btr_cursor = btr_pcur_get_btr_cur(cursor); - btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, + btr_cur_search_to_nth_level(index, level, tuple, mode, latch_mode, btr_cursor, 0, file, line, mtr); cursor->pos_state = BTR_PCUR_IS_POSITIONED; diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h index 1f920471f7d..5316c3efd39 100644 --- a/storage/innobase/include/btr0sea.h +++ b/storage/innobase/include/btr0sea.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic index beadeeb8d02..49ba0fd3f0b 100644 --- a/storage/innobase/include/btr0sea.ic +++ b/storage/innobase/include/btr0sea.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h index 5adc858b931..62b7868b419 100644 --- a/storage/innobase/include/btr0types.h +++ b/storage/innobase/include/btr0types.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h index 2d7d6146092..fab9a4b828b 100644 --- a/storage/innobase/include/buf0buddy.h +++ b/storage/innobase/include/buf0buddy.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -47,7 +47,7 @@ buf_buddy_alloc( buf_pool_t* buf_pool, /*!< in/out: buffer pool in which the page resides */ ulint size, /*!< in: compressed page size - (between PAGE_ZIP_MIN_SIZE and + (between UNIV_ZIP_SIZE_MIN and UNIV_PAGE_SIZE) */ ibool* lru) /*!< in: pointer to a variable that will be assigned TRUE if diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic index b8281f7341a..be2f950162d 100644 --- a/storage/innobase/include/buf0buddy.ic +++ b/storage/innobase/include/buf0buddy.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -77,7 +77,7 @@ buf_buddy_get_slot( ulint i; ulint s; - ut_ad(size >= PAGE_ZIP_MIN_SIZE); + ut_ad(size >= UNIV_ZIP_SIZE_MIN); for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) { } @@ -99,7 +99,7 @@ buf_buddy_alloc( buf_pool_t* buf_pool, /*!< in/out: buffer pool in which the page resides */ ulint size, /*!< in: compressed page size - (between PAGE_ZIP_MIN_SIZE and + (between UNIV_ZIP_SIZE_MIN and UNIV_PAGE_SIZE) */ ibool* lru) /*!< in: pointer to a variable that will be assigned TRUE if @@ -109,7 +109,7 @@ buf_buddy_alloc( { ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(ut_is_2pow(size)); - ut_ad(size >= PAGE_ZIP_MIN_SIZE); + ut_ad(size >= UNIV_ZIP_SIZE_MIN); ut_ad(size <= UNIV_PAGE_SIZE); return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), @@ -131,7 +131,7 @@ buf_buddy_free( { ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(ut_is_2pow(size)); - ut_ad(size >= PAGE_ZIP_MIN_SIZE); + ut_ad(size >= UNIV_ZIP_SIZE_MIN); ut_ad(size <= UNIV_PAGE_SIZE); buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size)); diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index d9e6801eb86..08e61c08004 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -36,6 +36,7 @@ Created 11/5/1995 Heikki Tuuri #ifndef UNIV_HOTBACKUP #include "ut0rbt.h" #include "os0proc.h" +#include "log0log.h" /** @name Modes for buf_page_get_gen */ /* @{ */ @@ -68,11 +69,18 @@ Created 11/5/1995 Heikki Tuuri position of the block. */ /* @} */ -#define MAX_BUFFER_POOLS 64 /*!< The maximum number of buffer +#define MAX_BUFFER_POOLS_BITS 6 /*!< Number of bits to representing + a buffer pool ID */ + +#define MAX_BUFFER_POOLS (1 << MAX_BUFFER_POOLS_BITS) + /*!< The maximum number of buffer pools that can be defined */ -#define BUF_POOL_WATCH_SIZE 1 /*!< Maximum number of concurrent +#define BUF_POOL_WATCH_SIZE (srv_n_purge_threads + 1) + /*!< Maximum number of concurrent buffer pool watches */ +#define MAX_PAGE_HASH_LOCKS 1024 /*!< The maximum number of + page_hash locks */ extern buf_pool_t* buf_pool_ptr; /*!< The buffer pools of the database */ @@ -137,10 +145,12 @@ struct buf_pool_info_struct{ ulint n_pend_reads; /*!< buf_pool->n_pend_reads, pages pending read */ ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */ + ulint n_pending_flush_single_page;/*!< Pages pending to be + flushed as part of single page + flushes issued by various user + threads */ ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH LIST */ - ulint n_pending_flush_single_page;/*!< Pages pending flush in - BUF_FLUSH_SINGLE_PAGE list */ ulint n_pages_made_young; /*!< number of pages made young */ ulint n_pages_not_made_young; /*!< number of pages not made young */ ulint n_pages_read; /*!< buf_pool->n_pages_read */ @@ -267,9 +277,10 @@ Gets the smallest oldest_modification lsn for any page in the pool. Returns zero if all modified pages have been flushed to disk. @return oldest modification in pool, zero if none */ UNIV_INTERN -ib_uint64_t +lsn_t buf_pool_get_oldest_modification(void); /*==================================*/ + /********************************************************************//** Allocates a buf_page_t descriptor. This function must succeed. In case of failure we assert in this function. */ @@ -341,8 +352,7 @@ buf_page_optimistic_get( /*====================*/ ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */ buf_block_t* block, /*!< in: guessed block */ - ib_uint64_t modify_clock,/*!< in: modify clock value if mode is - ..._GUESS_ON_CLOCK */ + ib_uint64_t modify_clock,/*!< in: modify clock value */ const char* file, /*!< in: file name */ ulint line, /*!< in: line where called */ mtr_t* mtr); /*!< in: mini-transaction */ @@ -364,7 +374,7 @@ buf_page_get_known_nowait( /*******************************************************************//** Given a tablespace id and page number tries to get that page. If the page is not in the buffer pool it is not loaded and NULL is returned. -Suitable for using when holding the kernel mutex. */ +Suitable for using when holding the lock_sys_t::mutex. */ UNIV_INTERN const buf_block_t* buf_page_try_get_func( @@ -376,7 +386,7 @@ buf_page_try_get_func( mtr_t* mtr); /*!< in: mini-transaction */ /** Tries to get a page. If the page is not in the buffer pool it is -not loaded. Suitable for using when holding the kernel mutex. +not loaded. Suitable for using when holding the lock_sys_t::mutex. @param space_id in: tablespace id @param page_no in: page number @param mtr in: mini-transaction @@ -489,15 +499,6 @@ buf_page_peek( /*==========*/ ulint space, /*!< in: space id */ ulint offset);/*!< in: page number */ -/********************************************************************//** -Resets the check_index_page_at_flush field of a page if found in the buffer -pool. */ -UNIV_INTERN -void -buf_reset_check_index_page_at_flush( -/*================================*/ - ulint space, /*!< in: space id */ - ulint offset);/*!< in: page number */ #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG /********************************************************************//** Sets file_page_was_freed TRUE if the page is found in the buffer pool. @@ -570,7 +571,7 @@ Gets the youngest modification log sequence number for a frame. Returns zero if not file page or no modification occurred yet. @return newest modification to page */ UNIV_INLINE -ib_uint64_t +lsn_t buf_page_get_newest_modification( /*=============================*/ const buf_page_t* bpage); /*!< in: block containing the @@ -622,29 +623,6 @@ buf_block_buf_fix_inc_func( # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) #endif /* UNIV_SYNC_DEBUG */ /********************************************************************//** -Calculates a page checksum which is stored to the page when it is written -to a file. Note that we must be careful to calculate the same value -on 32-bit and 64-bit architectures. -@return checksum */ -UNIV_INTERN -ulint -buf_calc_page_new_checksum( -/*=======================*/ - const byte* page); /*!< in: buffer page */ -/********************************************************************//** -In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only -looked at the first few bytes of the page. This calculates that old -checksum. -NOTE: we must first store the new formula checksum to -FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum -because this takes that field as an input! -@return checksum */ -UNIV_INTERN -ulint -buf_calc_page_old_checksum( -/*=======================*/ - const byte* page); /*!< in: buffer page */ -/********************************************************************//** Checks if a page is corrupt. @return TRUE if corrupted */ UNIV_INTERN @@ -687,6 +665,17 @@ buf_pool_contains_zip( buf_pool_t* buf_pool, /*!< in: buffer pool instance */ const void* data); /*!< in: pointer to compressed page */ #endif /* UNIV_DEBUG */ + +/*********************************************************************** +FIXME_FTS: Gets the frame the pointer is pointing to. */ +UNIV_INLINE +buf_frame_t* +buf_frame_align( +/*============*/ + /* out: pointer to frame */ + byte* ptr); /* in: pointer to a frame */ + + #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /*********************************************************************//** Validates the buffer pool data structure. @@ -724,7 +713,7 @@ buf_page_print( ulint flags) /*!< in: 0 or BUF_PAGE_PRINT_NO_CRASH or BUF_PAGE_PRINT_NO_FULL */ - __attribute__((nonnull)); + UNIV_COLD __attribute__((nonnull)); /********************************************************************//** Decompress a block. @return TRUE if successful */ @@ -745,12 +734,12 @@ buf_get_latched_pages_number(void); /*==============================*/ #endif /* UNIV_DEBUG */ /*********************************************************************//** -Returns the number of pending buf pool ios. -@return number of pending I/O operations */ +Returns the number of pending buf pool read ios. +@return number of pending read I/O operations */ UNIV_INTERN ulint -buf_get_n_pending_ios(void); -/*=======================*/ +buf_get_n_pending_read_ios(void); +/*============================*/ /*********************************************************************//** Prints info of the buffer i/o. */ UNIV_INTERN @@ -758,6 +747,18 @@ void buf_print_io( /*=========*/ FILE* file); /*!< in: file where to print */ +/*******************************************************************//** +Collect buffer pool stats information for a buffer pool. Also +record aggregated stats if there are more than one buffer pool +in the server */ +UNIV_INTERN +void +buf_stats_get_pool_info( +/*====================*/ + buf_pool_t* buf_pool, /*!< in: buffer pool */ + ulint pool_id, /*!< in: buffer pool ID */ + buf_pool_info_t* all_pool_info); /*!< in/out: buffer pool info + to fill */ /*********************************************************************//** Returns the ratio in percents of modified pages in the buffer pool / database pages in the buffer pool. @@ -792,8 +793,8 @@ pool. @return number of pending i/o operations */ UNIV_INTERN ulint -buf_pool_check_num_pending_io(void); -/*===============================*/ +buf_pool_check_no_pending_io(void); +/*==============================*/ /*********************************************************************//** Invalidates the file pages in the buffer pool when an archive recovery is completed. All the file pages buffered must be in a replaceable state when @@ -1095,7 +1096,7 @@ buf_block_get_zip_size( Gets the compressed page descriptor corresponding to an uncompressed page if applicable. */ #define buf_block_get_page_zip(block) \ - (UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL) + ((block)->page.zip.data ? &(block)->page.zip : NULL) #ifndef UNIV_HOTBACKUP /*******************************************************************//** Gets the block to whose frame the pointer is pointing to. @@ -1229,35 +1230,83 @@ UNIV_INLINE buf_page_t* buf_page_hash_get_low( /*==================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page - within space */ - ulint fold); /*!< in: buf_page_address_fold( - space, offset) */ + buf_pool_t* buf_pool,/*!< buffer pool instance */ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: offset of the page within space */ + ulint fold); /*!< in: buf_page_address_fold(space, offset) */ /******************************************************************//** Returns the control block of a file page, NULL if not found. -@return block, NULL if not found or not a real control block */ +If the block is found and lock is not NULL then the appropriate +page_hash lock is acquired in the specified lock mode. Otherwise, +mode value is ignored. It is up to the caller to release the +lock. If the block is found and the lock is NULL then the page_hash +lock is released by this function. +@return block, NULL if not found */ UNIV_INLINE buf_page_t* -buf_page_hash_get( -/*==============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ +buf_page_hash_get_locked( +/*=====================*/ + /*!< out: pointer to the bpage, + or NULL; if NULL, hash_lock + is also NULL. */ + buf_pool_t* buf_pool, /*!< buffer pool instance */ ulint space, /*!< in: space id */ - ulint offset); /*!< in: offset of the page - within space */ + ulint offset, /*!< in: page number */ + rw_lock_t** lock, /*!< in/out: lock of the page + hash acquired if bpage is + found. NULL otherwise. If NULL + is passed then the hash_lock + is released by this function */ + ulint lock_mode); /*!< in: RW_LOCK_EX or + RW_LOCK_SHARED. Ignored if + lock == NULL */ /******************************************************************//** -Returns the control block of a file page, NULL if not found -or an uncompressed page frame does not exist. +Returns the control block of a file page, NULL if not found. +If the block is found and lock is not NULL then the appropriate +page_hash lock is acquired in the specified lock mode. Otherwise, +mode value is ignored. It is up to the caller to release the +lock. If the block is found and the lock is NULL then the page_hash +lock is released by this function. @return block, NULL if not found */ UNIV_INLINE buf_block_t* -buf_block_hash_get( -/*===============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ +buf_block_hash_get_locked( +/*=====================*/ + /*!< out: pointer to the bpage, + or NULL; if NULL, hash_lock + is also NULL. */ + buf_pool_t* buf_pool, /*!< buffer pool instance */ ulint space, /*!< in: space id */ - ulint offset); /*!< in: offset of the page - within space */ + ulint offset, /*!< in: page number */ + rw_lock_t** lock, /*!< in/out: lock of the page + hash acquired if bpage is + found. NULL otherwise. If NULL + is passed then the hash_lock + is released by this function */ + ulint lock_mode); /*!< in: RW_LOCK_EX or + RW_LOCK_SHARED. Ignored if + lock == NULL */ +/* There are four different ways we can try to get a bpage or block +from the page hash: +1) Caller already holds the appropriate page hash lock: in the case call +buf_page_hash_get_low() function. +2) Caller wants to hold page hash lock in x-mode +3) Caller wants to hold page hash lock in s-mode +4) Caller doesn't want to hold page hash lock */ +#define buf_page_hash_get_s_locked(b, s, o, l) \ + buf_page_hash_get_locked(b, s, o, l, RW_LOCK_SHARED) +#define buf_page_hash_get_x_locked(b, s, o, l) \ + buf_page_hash_get_locked(b, s, o, l, RW_LOCK_EX) +#define buf_page_hash_get(b, s, o) \ + buf_page_hash_get_locked(b, s, o, NULL, 0) + +#define buf_block_hash_get_s_locked(b, s, o, l) \ + buf_block_hash_get_locked(b, s, o, l, RW_LOCK_SHARED) +#define buf_block_hash_get_x_locked(b, s, o, l) \ + buf_block_hash_get_locked(b, s, o, l, RW_LOCK_EX) +#define buf_block_hash_get(b, s, o) \ + buf_block_hash_get_locked(b, s, o, NULL, 0) + /*********************************************************************//** Gets the current length of the free list of buffer blocks. @return length of the free list */ @@ -1324,12 +1373,25 @@ void buf_get_total_stat( /*===============*/ buf_pool_stat_t*tot_stat); /*!< out: buffer pool stats */ +/*********************************************************************//** +Get the nth chunk's buffer block in the specified buffer pool. +@return the nth chunk's buffer block. */ +UNIV_INLINE +buf_block_t* +buf_get_nth_chunk_block( +/*====================*/ + const buf_pool_t* buf_pool, /*!< in: buffer pool instance */ + ulint n, /*!< in: nth chunk in the buffer pool */ + ulint* chunk_size); /*!< in: chunk size */ #endif /* !UNIV_HOTBACKUP */ /** The common buffer control block structure for compressed and uncompressed frames */ +/** Number of bits used for buffer page states. */ +#define BUF_PAGE_STATE_BITS 3 + struct buf_page_struct{ /** @name General fields None of these bit-fields must be modified without holding @@ -1344,13 +1406,23 @@ struct buf_page_struct{ unsigned offset:32; /*!< page number; also protected by buf_pool->mutex. */ - unsigned state:3; /*!< state of the control block; also + unsigned state:BUF_PAGE_STATE_BITS; + /*!< state of the control block; also protected by buf_pool->mutex. State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY need not be protected by buf_page_get_mutex(). - @see enum buf_page_state */ + @see enum buf_page_state. + State changes that are relevant + to page_hash are additionally + protected by the appropriate + page_hash mutex i.e.: if a page + is in page_hash or is being + added to/removed from page_hash + then the corresponding changes + must also be protected by + page_hash mutex. */ #ifndef UNIV_HOTBACKUP unsigned flush_type:2; /*!< if this block is currently being flushed to disk, this tells the @@ -1432,13 +1504,13 @@ struct buf_page_struct{ should hold: in_free_list == (state == BUF_BLOCK_NOT_USED) */ #endif /* UNIV_DEBUG */ - ib_uint64_t newest_modification; + lsn_t newest_modification; /*!< log sequence number of the youngest modification to this block, zero if not modified. Protected by block mutex */ - ib_uint64_t oldest_modification; + lsn_t oldest_modification; /*!< log sequence number of the START of the log entry written of the oldest @@ -1480,8 +1552,10 @@ struct buf_page_struct{ /* @} */ # if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ibool file_page_was_freed; - /*!< this is set to TRUE when fsp - frees a page in buffer pool */ + /*!< this is set to TRUE when + fsp frees a page in buffer pool; + protected by buf_pool->zip_mutex + or buf_block_struct::mutex. */ # endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ #endif /* !UNIV_HOTBACKUP */ }; @@ -1575,7 +1649,7 @@ struct buf_block_struct{ - we know that buf_block_struct::buf_fix_count == 0. An exception to this is when we init or create a page - in the buffer pool in buf0buf.c. + in the buffer pool in buf0buf.cc. Another exception is that assigning block->index = NULL is allowed whenever holding an x-latch on btr_search_latch. */ @@ -1701,7 +1775,14 @@ struct buf_pool_struct{ hash_table_t* page_hash; /*!< hash table of buf_page_t or buf_block_t file pages, buf_page_in_file() == TRUE, - indexed by (space_id, offset) */ + indexed by (space_id, offset). + page_hash is protected by an + array of mutexes. + Changes in page_hash are protected + by buf_pool->mutex and the relevant + page_hash mutex. Lookups can happen + while holding the buf_pool->mutex or + the relevant page_hash mutex. */ hash_table_t* zip_hash; /*!< hash table of buf_block_t blocks whose frames are allocated to the zip buddy system, @@ -1713,7 +1794,7 @@ struct buf_pool_struct{ time_t last_printout_time; /*!< when buf_print_io was last time called */ - buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES + 1]; + buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1]; /*!< Statistics of buddy system, indexed by block size */ buf_pool_stat_t stat; /*!< current statistics */ @@ -1768,10 +1849,16 @@ struct buf_pool_struct{ to read this for heuristic purposes without holding any mutex or latch */ - ulint LRU_flush_ended;/*!< when an LRU flush ends for a page, - this is incremented by one; this is - set to zero when a buffer block is - allocated */ + ibool try_LRU_scan; /*!< Set to FALSE when an LRU + scan for free block fails. This + flag is used to avoid repeated + scans of LRU list when we know + that there is no free block + available in the scan depth for + eviction. Set to TRUE whenever + we flush a batch from the + buffer pool. Protected by the + buf_pool->mutex */ /* @} */ /** @name LRU replacement algorithm fields */ @@ -1792,7 +1879,7 @@ struct buf_pool_struct{ ulint LRU_old_len; /*!< length of the LRU list from the block to which LRU_old points onward, including that block; - see buf0lru.c for the restrictions + see buf0lru.cc for the restrictions on this value; 0 if LRU_old == NULL; NOTE: LRU_old_len must be adjusted whenever LRU_old shrinks or grows! */ @@ -1811,19 +1898,16 @@ struct buf_pool_struct{ UT_LIST_BASE_NODE_T(buf_page_t) zip_clean; /*!< unmodified compressed pages */ #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ - UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES]; + UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES_MAX]; /*!< buddy free lists */ - buf_page_t watch[BUF_POOL_WATCH_SIZE]; + buf_page_t* watch; /*!< Sentinel records for buffer pool watches. Protected by - buf_pool->mutex. */ + buf_pool->mutex. */ -#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE -# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE" -#endif -#if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE -# error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE" +#if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN +# error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN" #endif /* @} */ }; @@ -1854,6 +1938,47 @@ Use these instead of accessing buf_pool->mutex directly. */ +/** Get appropriate page_hash_lock. */ +# define buf_page_hash_lock_get(b, f) \ + hash_get_lock(b->page_hash, f) + +#ifdef UNIV_SYNC_DEBUG +/** Test if page_hash lock is held in s-mode. */ +# define buf_page_hash_lock_held_s(b, p) \ + rw_lock_own(buf_page_hash_lock_get(b, \ + buf_page_address_fold(p->space, \ + p->offset)), \ + RW_LOCK_SHARED) + +/** Test if page_hash lock is held in x-mode. */ +# define buf_page_hash_lock_held_x(b, p) \ + rw_lock_own(buf_page_hash_lock_get(b, \ + buf_page_address_fold(p->space, \ + p->offset)), \ + RW_LOCK_EX) + +/** Test if page_hash lock is held in x or s-mode. */ +# define buf_page_hash_lock_held_s_or_x(b, p) \ + (buf_page_hash_lock_held_s(b, p) \ + || buf_page_hash_lock_held_x(b, p)) + +# define buf_block_hash_lock_held_s(b, p) \ + buf_page_hash_lock_held_s(b, &(p->page)) + +# define buf_block_hash_lock_held_x(b, p) \ + buf_page_hash_lock_held_x(b, &(p->page)) + +# define buf_block_hash_lock_held_s_or_x(b, p) \ + buf_page_hash_lock_held_s_or_x(b, &(p->page)) +#else /* UNIV_SYNC_DEBUG */ +# define buf_page_hash_lock_held_s(b, p) (TRUE) +# define buf_page_hash_lock_held_x(b, p) (TRUE) +# define buf_page_hash_lock_held_s_or_x(b, p) (TRUE) +# define buf_block_hash_lock_held_s(b, p) (TRUE) +# define buf_block_hash_lock_held_x(b, p) (TRUE) +# define buf_block_hash_lock_held_s_or_x(b, p) (TRUE) +#endif /* UNIV_SYNC_DEBUG */ + #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /** Forbid the release of the buffer pool mutex. */ # define buf_pool_mutex_exit_forbid(b) do { \ @@ -1926,6 +2051,32 @@ FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if (3) io_fix == 0. */ +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/** Functor to validate the LRU list. */ +struct CheckInLRUList { + void operator()(const buf_page_t* elem) const + { + ut_a(elem->in_LRU_list); + } +}; + +/** Functor to validate the LRU list. */ +struct CheckInFreeList { + void operator()(const buf_page_t* elem) const + { + ut_a(elem->in_free_list); + } +}; + +struct CheckUnzipLRUAndLRUList { + void operator()(const buf_block_t* elem) const + { + ut_a(elem->page.in_LRU_list); + ut_a(elem->in_unzip_LRU_list); + } +}; +#endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */ + #ifndef UNIV_NONINL #include "buf0buf.ic" #endif diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index 917ee5dda84..88c29ab5603 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -35,6 +35,16 @@ Created 11/5/1995 Heikki Tuuri #include "buf0lru.h" #include "buf0rea.h" +#ifndef UNIV_HOTBACKUP +/** A chunk of buffers. The buffer pool is allocated in chunks. */ +struct buf_chunk_struct{ + ulint mem_size; /*!< allocated size of the chunk */ + ulint size; /*!< size of frames[] and blocks[] */ + void* mem; /*!< pointer to the memory area which + was allocated for the frames */ + buf_block_t* blocks; /*!< array of buffer control blocks */ +}; + /*********************************************************************//** Gets the current size of buffer buf_pool in bytes. @return size in bytes */ @@ -160,7 +170,7 @@ buf_page_peek_if_too_old( { buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - if (UNIV_UNLIKELY(buf_pool->freed_page_clock == 0)) { + if (buf_pool->freed_page_clock == 0) { /* If eviction has not started yet, do not update the statistics or move blocks in the LRU list. This is either the warm-up phase or an in-memory workload. */ @@ -180,6 +190,7 @@ buf_page_peek_if_too_old( return(!buf_page_peek_if_young(bpage)); } } +#endif /* !UNIV_HOTBACKUP */ /*********************************************************************//** Gets the state of a block. @@ -361,11 +372,11 @@ buf_page_get_flush_type( #ifdef UNIV_DEBUG switch (flush_type) { case BUF_FLUSH_LRU: - case BUF_FLUSH_SINGLE_PAGE: case BUF_FLUSH_LIST: + case BUF_FLUSH_SINGLE_PAGE: return(flush_type); case BUF_FLUSH_N_TYPES: - break; + ut_error; } ut_error; #endif /* UNIV_DEBUG */ @@ -628,7 +639,7 @@ buf_page_get_block( /*===============*/ buf_page_t* bpage) /*!< in: control block, or NULL */ { - if (UNIV_LIKELY(bpage != NULL)) { + if (bpage != NULL) { ut_ad(buf_page_in_file(bpage)); if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) { @@ -719,6 +730,23 @@ buf_page_get_page_no( return(bpage->offset); } +/*********************************************************************** +FIXME_FTS Gets the frame the pointer is pointing to. */ +UNIV_INLINE +buf_frame_t* +buf_frame_align( +/*============*/ + /* out: pointer to frame */ + byte* ptr) /* in: pointer to a frame */ +{ + buf_frame_t* frame; + + ut_ad(ptr); + + frame = (buf_frame_t*) ut_align_down(ptr, UNIV_PAGE_SIZE); + + return(frame); +} /*********************************************************************//** Gets the page number of a block. @@ -744,7 +772,8 @@ buf_page_get_zip_size( /*==================*/ const buf_page_t* bpage) /*!< in: pointer to the control block */ { - return(bpage->zip.ssize ? 512 << bpage->zip.ssize : 0); + return(bpage->zip.ssize + ? (UNIV_ZIP_SIZE_MIN >> 1) << bpage->zip.ssize : 0); } /*********************************************************************//** @@ -756,7 +785,8 @@ buf_block_get_zip_size( /*===================*/ const buf_block_t* block) /*!< in: pointer to the control block */ { - return(block->page.zip.ssize ? 512 << block->page.zip.ssize : 0); + return(block->page.zip.ssize + ? (UNIV_ZIP_SIZE_MIN >> 1) << block->page.zip.ssize : 0); } #ifndef UNIV_HOTBACKUP @@ -852,7 +882,7 @@ buf_block_free( /*===========*/ buf_block_t* block) /*!< in, own: block to be freed */ { - buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block); + buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*) block); buf_pool_mutex_enter(buf_pool); @@ -905,13 +935,13 @@ Gets the youngest modification log sequence number for a frame. Returns zero if not file page or no modification occurred yet. @return newest modification to page */ UNIV_INLINE -ib_uint64_t +lsn_t buf_page_get_newest_modification( /*=============================*/ const buf_page_t* bpage) /*!< in: block containing the page frame */ { - ib_uint64_t lsn; + lsn_t lsn; mutex_t* block_mutex = buf_page_get_mutex(bpage); mutex_enter(block_mutex); @@ -938,7 +968,7 @@ buf_block_modify_clock_inc( buf_block_t* block) /*!< in: block */ { #ifdef UNIV_SYNC_DEBUG - buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block); + buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*) block); ut_ad((buf_pool_mutex_own(buf_pool) && (block->page.buf_fix_count == 0)) @@ -1047,18 +1077,24 @@ UNIV_INLINE buf_page_t* buf_page_hash_get_low( /*==================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ - ulint space, /*!< in: space id */ - ulint offset, /*!< in: offset of the page - within space */ - ulint fold) /*!< in: buf_page_address_fold( - space, offset) */ + buf_pool_t* buf_pool,/*!< buffer pool instance */ + ulint space, /*!< in: space id */ + ulint offset, /*!< in: offset of the page within space */ + ulint fold) /*!< in: buf_page_address_fold(space, offset) */ { buf_page_t* bpage; - ut_ad(buf_pool); - ut_ad(buf_pool_mutex_own(buf_pool)); - ut_ad(fold == buf_page_address_fold(space, offset)); +#ifdef UNIV_SYNC_DEBUG + ulint hash_fold; + rw_lock_t* hash_lock; + + hash_fold = buf_page_address_fold(space, offset); + ut_ad(hash_fold == fold); + + hash_lock = hash_get_lock(buf_pool->page_hash, fold); + ut_ad(rw_lock_own(hash_lock, RW_LOCK_EX) + || rw_lock_own(hash_lock, RW_LOCK_SHARED)); +#endif /* UNIV_SYNC_DEBUG */ /* Look for the page in the hash table */ @@ -1083,46 +1119,145 @@ buf_page_hash_get_low( /******************************************************************//** Returns the control block of a file page, NULL if not found. -@return block, NULL if not found or not a real control block */ +If the block is found and lock is not NULL then the appropriate +page_hash lock is acquired in the specified lock mode. Otherwise, +mode value is ignored. It is up to the caller to release the +lock. If the block is found and the lock is NULL then the page_hash +lock is released by this function. +@return block, NULL if not found */ UNIV_INLINE buf_page_t* -buf_page_hash_get( -/*==============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ +buf_page_hash_get_locked( +/*=====================*/ + /*!< out: pointer to the bpage, + or NULL; if NULL, hash_lock + is also NULL. */ + buf_pool_t* buf_pool, /*!< buffer pool instance */ ulint space, /*!< in: space id */ - ulint offset) /*!< in: offset of the page - within space */ + ulint offset, /*!< in: page number */ + rw_lock_t** lock, /*!< in/out: lock of the page + hash acquired if bpage is + found. NULL otherwise. If NULL + is passed then the hash_lock + is released by this function */ + ulint lock_mode) /*!< in: RW_LOCK_EX or + RW_LOCK_SHARED. Ignored if + lock == NULL */ { - buf_page_t* bpage; - ulint fold = buf_page_address_fold(space, offset); + buf_page_t* bpage = NULL; + ulint fold; + rw_lock_t* hash_lock; + ulint mode = RW_LOCK_SHARED; + + if (lock != NULL) { + *lock = NULL; + ut_ad(lock_mode == RW_LOCK_EX + || lock_mode == RW_LOCK_SHARED); + mode = lock_mode; + } + + fold = buf_page_address_fold(space, offset); + hash_lock = hash_get_lock(buf_pool->page_hash, fold); + +#ifdef UNIV_SYNC_DEBUG + ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX) + && !rw_lock_own(hash_lock, RW_LOCK_SHARED)); +#endif /* UNIV_SYNC_DEBUG */ + + if (mode == RW_LOCK_SHARED) { + rw_lock_s_lock(hash_lock); + } else { + rw_lock_x_lock(hash_lock); + } - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); + bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - if (bpage && buf_pool_watch_is_sentinel(buf_pool, bpage)) { + if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) { bpage = NULL; + goto unlock_and_exit; } + ut_ad(buf_page_in_file(bpage)); + ut_ad(offset == bpage->offset); + ut_ad(space == bpage->space); + + if (lock == NULL) { + /* The caller wants us to release the page_hash lock */ + goto unlock_and_exit; + } else { + /* To be released by the caller */ + *lock = hash_lock; + goto exit; + } + +unlock_and_exit: + if (mode == RW_LOCK_SHARED) { + rw_lock_s_unlock(hash_lock); + } else { + rw_lock_x_unlock(hash_lock); + } +exit: return(bpage); } /******************************************************************//** -Returns the control block of a file page, NULL if not found -or an uncompressed page frame does not exist. +Returns the control block of a file page, NULL if not found. +If the block is found and lock is not NULL then the appropriate +page_hash lock is acquired in the specified lock mode. Otherwise, +mode value is ignored. It is up to the caller to release the +lock. If the block is found and the lock is NULL then the page_hash +lock is released by this function. @return block, NULL if not found */ UNIV_INLINE buf_block_t* -buf_block_hash_get( -/*===============*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ +buf_block_hash_get_locked( +/*=====================*/ + /*!< out: pointer to the bpage, + or NULL; if NULL, hash_lock + is also NULL. */ + buf_pool_t* buf_pool, /*!< buffer pool instance */ ulint space, /*!< in: space id */ - ulint offset) /*!< in: offset of the page - within space */ + ulint offset, /*!< in: page number */ + rw_lock_t** lock, /*!< in/out: lock of the page + hash acquired if bpage is + found. NULL otherwise. If NULL + is passed then the hash_lock + is released by this function */ + ulint lock_mode) /*!< in: RW_LOCK_EX or + RW_LOCK_SHARED. Ignored if + lock == NULL */ { - buf_block_t* block; + buf_page_t* bpage = buf_page_hash_get_locked(buf_pool, + space, + offset, + lock, + lock_mode); + buf_block_t* block = buf_page_get_block(bpage); + + if (block) { + ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); +#ifdef UNIV_SYNC_DEBUG + ut_ad(!lock || rw_lock_own(*lock, lock_mode)); +#endif /* UNIV_SYNC_DEBUG */ + return(block); + } else if (bpage) { + /* It is not a block. Just a bpage */ + ut_ad(buf_page_in_file(bpage)); - block = buf_page_get_block(buf_page_hash_get(buf_pool, space, offset)); + if (lock) { + if (lock_mode == RW_LOCK_SHARED) { + rw_lock_s_unlock(*lock); + } else { + rw_lock_x_unlock(*lock); + } + } + *lock = NULL; + return(NULL); + } - return(block); + ut_ad(!bpage); + ut_ad(lock == NULL ||*lock == NULL); + return(NULL); } /********************************************************************//** @@ -1139,16 +1274,9 @@ buf_page_peek( ulint space, /*!< in: space id */ ulint offset) /*!< in: page number */ { - const buf_page_t* bpage; buf_pool_t* buf_pool = buf_pool_get(space, offset); - buf_pool_mutex_enter(buf_pool); - - bpage = buf_page_hash_get(buf_pool, space, offset); - - buf_pool_mutex_exit(buf_pool); - - return(bpage != NULL); + return(buf_page_hash_get(buf_pool, space, offset) != NULL); } /********************************************************************//** @@ -1189,7 +1317,6 @@ buf_page_release_zip( break; } - ut_error; } @@ -1274,4 +1401,21 @@ buf_pool_mutex_exit_all(void) buf_pool_mutex_exit(buf_pool); } } +/*********************************************************************//** +Get the nth chunk's buffer block in the specified buffer pool. +@return the nth chunk's buffer block. */ +UNIV_INLINE +buf_block_t* +buf_get_nth_chunk_block( +/*====================*/ + const buf_pool_t* buf_pool, /*!< in: buffer pool instance */ + ulint n, /*!< in: nth chunk in the buffer pool */ + ulint* chunk_size) /*!< in: chunk size */ +{ + const buf_chunk_t* chunk; + + chunk = buf_pool->chunks + n; + *chunk_size = chunk->size; + return(chunk->blocks); +} #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/buf0checksum.h b/storage/innobase/include/buf0checksum.h new file mode 100644 index 00000000000..cd21781dc6e --- /dev/null +++ b/storage/innobase/include/buf0checksum.h @@ -0,0 +1,88 @@ +/***************************************************************************** + +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file buf/buf0checksum.h +Buffer pool checksum functions, also linked from /extra/innochecksum.cc + +Created Aug 11, 2011 Vasil Dimov +*******************************************************/ + +#ifndef buf0checksum_h +#define buf0checksum_h + +#include "univ.i" + +#ifndef UNIV_INNOCHECKSUM + +#include "buf0types.h" + +#endif /* !UNIV_INNOCHECKSUM */ + +/********************************************************************//** +Calculates a page CRC32 which is stored to the page when it is written +to a file. Note that we must be careful to calculate the same value on +32-bit and 64-bit architectures. +@return checksum */ +UNIV_INTERN +ib_uint32_t +buf_calc_page_crc32( +/*================*/ + const byte* page); /*!< in: buffer page */ + +/********************************************************************//** +Calculates a page checksum which is stored to the page when it is written +to a file. Note that we must be careful to calculate the same value on +32-bit and 64-bit architectures. +@return checksum */ +UNIV_INTERN +ulint +buf_calc_page_new_checksum( +/*=======================*/ + const byte* page); /*!< in: buffer page */ + +/********************************************************************//** +In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only +looked at the first few bytes of the page. This calculates that old +checksum. +NOTE: we must first store the new formula checksum to +FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum +because this takes that field as an input! +@return checksum */ +UNIV_INTERN +ulint +buf_calc_page_old_checksum( +/*=======================*/ + const byte* page); /*!< in: buffer page */ + +#ifndef UNIV_INNOCHECKSUM + +/********************************************************************//** +Return a printable string describing the checksum algorithm. +@return algorithm name */ +UNIV_INTERN +const char* +buf_checksum_algorithm_name( +/*========================*/ + srv_checksum_algorithm_t algo); /*!< in: algorithm */ + +extern ulong srv_checksum_algorithm; + +#endif /* !UNIV_INNOCHECKSUM */ + +#endif /* buf0checksum_h */ diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h new file mode 100644 index 00000000000..fcc56d91405 --- /dev/null +++ b/storage/innobase/include/buf0dblwr.h @@ -0,0 +1,148 @@ +/***************************************************************************** + +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/buf0dblwr.h +Doublewrite buffer module + +Created 2011/12/19 Inaam Rana +*******************************************************/ + +#ifndef buf0dblwr_h +#define buf0dblwr_h + +#include "univ.i" +#include "ut0byte.h" +#include "log0log.h" +#include "buf0types.h" + +#ifndef UNIV_HOTBACKUP + +/** Doublewrite system */ +extern buf_dblwr_t* buf_dblwr; +/** Set to TRUE when the doublewrite buffer is being created */ +extern ibool buf_dblwr_being_created; + +/****************************************************************//** +Creates the doublewrite buffer to a new InnoDB installation. The header of the +doublewrite buffer is placed on the trx system header page. */ +UNIV_INTERN +void +buf_dblwr_create(void); +/*==================*/ +/****************************************************************//** +At a database startup initializes the doublewrite buffer memory structure if +we already have a doublewrite buffer created in the data files. If we are +upgrading to an InnoDB version which supports multiple tablespaces, then this +function performs the necessary update operations. If we are in a crash +recovery, this function uses a possible doublewrite buffer to restore +half-written pages in the data files. */ +UNIV_INTERN +void +buf_dblwr_init_or_restore_pages( +/*============================*/ + ibool restore_corrupt_pages); /*!< in: TRUE=restore pages */ +/****************************************************************//** +frees doublewrite buffer. */ +UNIV_INTERN +void +buf_dblwr_free(void); +/*================*/ +/********************************************************************//** +Updates the doublewrite buffer when an IO request that is part of an +LRU or flush batch is completed. */ +UNIV_INTERN +void +buf_dblwr_update(void); +/*==================*/ +/****************************************************************//** +Determines if a page number is located inside the doublewrite buffer. +@return TRUE if the location is inside the two blocks of the +doublewrite buffer */ +UNIV_INTERN +ibool +buf_dblwr_page_inside( +/*==================*/ + ulint page_no); /*!< in: page number */ +/********************************************************************//** +Posts a buffer page for writing. If the doublewrite memory buffer is +full, calls buf_dblwr_flush_buffered_writes and waits for for free +space to appear. */ +UNIV_INTERN +void +buf_dblwr_add_to_batch( +/*====================*/ + buf_page_t* bpage); /*!< in: buffer block to write */ +/********************************************************************//** +Flushes possible buffered writes from the doublewrite memory buffer to disk, +and also wakes up the aio thread if simulated aio is used. It is very +important to call this function after a batch of writes has been posted, +and also when we may have to wait for a page latch! Otherwise a deadlock +of threads can occur. */ +UNIV_INTERN +void +buf_dblwr_flush_buffered_writes(void); +/*=================================*/ +/********************************************************************//** +Writes a page to the doublewrite buffer on disk, sync it, then write +the page to the datafile and sync the datafile. This function is used +for single page flushes. If all the buffers allocated for single page +flushes in the doublewrite buffer are in use we wait here for one to +become free. We are guaranteed that a slot will become free because any +thread that is using a slot must also release the slot before leaving +this function. */ +UNIV_INTERN +void +buf_dblwr_write_single_page( +/*========================*/ + buf_page_t* bpage); /*!< in: buffer block to write */ + +/** Doublewrite control struct */ +struct buf_dblwr_struct{ + mutex_t mutex; /*!< mutex protecting the first_free field and + write_buf */ + ulint block1; /*!< the page number of the first + doublewrite block (64 pages) */ + ulint block2; /*!< page number of the second block */ + ulint first_free; /*!< first free position in write_buf measured + in units of UNIV_PAGE_SIZE */ + ulint s_reserved; /*!< number of slots currently reserved + for single page flushes. */ + ulint b_reserved; /*!< number of slots currently reserved + for batch flush. */ + ibool* in_use; /*!< flag used to indicate if a slot is + in use. Only used for single page + flushes. */ + ibool batch_running; /*!< set to TRUE if currently a batch + is being written from the doublewrite + buffer. */ + byte* write_buf; /*!< write buffer used in writing to the + doublewrite buffer, aligned to an + address divisible by UNIV_PAGE_SIZE + (which is required by Windows aio) */ + byte* write_buf_unaligned; + /*!< pointer to write_buf, but unaligned */ + buf_page_t** + buf_block_arr; /*!< array to store pointers to the buffer + blocks which have been cached to write_buf */ +}; + + +#endif /* UNIV_HOTBACKUP */ + +#endif diff --git a/storage/innobase/include/buf0dump.h b/storage/innobase/include/buf0dump.h new file mode 100644 index 00000000000..c704a8e97e0 --- /dev/null +++ b/storage/innobase/include/buf0dump.h @@ -0,0 +1,72 @@ +/***************************************************************************** + +Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file buf/buf0dump.h +Implements a buffer pool dump/load. + +Created April 08, 2011 Vasil Dimov +*******************************************************/ + +#ifndef buf0dump_h +#define buf0dump_h + +#include "univ.i" + +/*****************************************************************//** +Wakes up the buffer pool dump/load thread and instructs it to start +a dump. This function is called by MySQL code via buffer_pool_dump_now() +and it should return immediately because the whole MySQL is frozen during +its execution. */ +UNIV_INTERN +void +buf_dump_start(); +/*============*/ + +/*****************************************************************//** +Wakes up the buffer pool dump/load thread and instructs it to start +a load. This function is called by MySQL code via buffer_pool_load_now() +and it should return immediately because the whole MySQL is frozen during +its execution. */ +UNIV_INTERN +void +buf_load_start(); +/*============*/ + +/*****************************************************************//** +Aborts a currently running buffer pool load. This function is called by +MySQL code via buffer_pool_load_abort() and it should return immediately +because the whole MySQL is frozen during its execution. */ +UNIV_INTERN +void +buf_load_abort(); +/*============*/ + +/*****************************************************************//** +This is the main thread for buffer pool dump/load. It waits for an +event and when waked up either performs a dump or load and sleeps +again. +@return this function does not return, it calls os_thread_exit() */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(buf_dump_thread)( +/*============================*/ + void* arg); /*!< in: a dummy parameter + required by os_thread_create */ + +#endif /* buf0dump_h */ diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index ae27f5dab0e..faf577f718b 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -28,10 +28,13 @@ Created 11/5/1995 Heikki Tuuri #include "univ.i" #include "ut0byte.h" +#include "log0log.h" #ifndef UNIV_HOTBACKUP #include "mtr0types.h" #include "buf0types.h" -#include "log0log.h" + +/** Flag indicating if the page_cleaner is in active state. */ +extern ibool buf_page_cleaner_is_active; /********************************************************************//** Remove a block from the flush list of modified blocks. */ @@ -57,21 +60,6 @@ void buf_flush_write_complete( /*=====================*/ buf_page_t* bpage); /*!< in: pointer to the block in question */ -/*********************************************************************//** -Flushes pages from the end of the LRU list if there is too small -a margin of replaceable pages there. If buffer pool is NULL it -means flush free margin on all buffer pool instances. */ -UNIV_INTERN -void -buf_flush_free_margin( -/*==================*/ - buf_pool_t* buf_pool); -/*********************************************************************//** -Flushes pages from the end of all the LRU lists. */ -UNIV_INTERN -void -buf_flush_free_margins(void); -/*=========================*/ #endif /* !UNIV_HOTBACKUP */ /********************************************************************//** Initializes a page for writing to the tablespace. */ @@ -79,10 +67,10 @@ UNIV_INTERN void buf_flush_init_for_writing( /*=======================*/ - byte* page, /*!< in/out: page */ - void* page_zip_, /*!< in/out: compressed page, or NULL */ - ib_uint64_t newest_lsn); /*!< in: newest modification lsn - to the page */ + byte* page, /*!< in/out: page */ + void* page_zip_, /*!< in/out: compressed page, or NULL */ + lsn_t newest_lsn); /*!< in: newest modification lsn + to the page */ #ifndef UNIV_HOTBACKUP # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG /********************************************************************//** @@ -99,21 +87,13 @@ buf_flush_page_try( buf_block_t* block) /*!< in/out: buffer control block */ __attribute__((nonnull, warn_unused_result)); # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ -/*******************************************************************//** -This utility flushes dirty blocks from the end of the LRU list. -NOTE: The calling thread may own latches to pages: to avoid deadlocks, -this function must be written so that it cannot end up waiting for these -latches! -@return number of blocks for which the write request was queued; -ULINT_UNDEFINED if there was a flush of the same type already running */ +/********************************************************************//** +Flush a batch of writes to the datafiles that have already been +written by the OS. */ UNIV_INTERN -ulint -buf_flush_LRU( -/*==========*/ - buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint min_n); /*!< in: wished minimum mumber of blocks - flushed (it is not guaranteed that the - actual number is that big, though) */ +void +buf_flush_sync_datafiles(void); +/*==========================*/ /*******************************************************************//** This utility flushes dirty blocks from the end of the flush_list of all buffer pool instances. @@ -127,12 +107,25 @@ buf_flush_list( ulint min_n, /*!< in: wished minimum mumber of blocks flushed (it is not guaranteed that the actual number is that big, though) */ - ib_uint64_t lsn_limit); /*!< in the case BUF_FLUSH_LIST all + lsn_t lsn_limit); /*!< in the case BUF_FLUSH_LIST all blocks whose oldest_modification is smaller than this should be flushed (if their number does not exceed min_n), otherwise ignored */ /******************************************************************//** +This function picks up a single dirty page from the tail of the LRU +list, flushes it, removes it from page_hash and LRU list and puts +it on the free list. It is called from user threads when they are +unable to find a replacable page at the tail of the LRU list i.e.: +when the background LRU flushing in the page_cleaner thread is not +fast enough to keep pace with the workload. +@return TRUE if success. */ +UNIV_INTERN +ibool +buf_flush_single_page_from_LRU( +/*===========================*/ + buf_pool_t* buf_pool); /*!< in/out: buffer pool instance */ +/******************************************************************//** Waits until a flush batch of the given type ends */ UNIV_INTERN void @@ -169,9 +162,9 @@ void buf_flush_recv_note_modification( /*=============================*/ buf_block_t* block, /*!< in: block which is modified */ - ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a + lsn_t start_lsn, /*!< in: start lsn of the first mtr in a set of mtr's */ - ib_uint64_t end_lsn); /*!< in: end lsn of the last mtr in the + lsn_t end_lsn); /*!< in: end lsn of the last mtr in the set of mtr's */ /********************************************************************//** Returns TRUE if the file page block is immediately suitable for replacement, @@ -195,8 +188,8 @@ how much redo the workload is generating and at what rate. */ struct buf_flush_stat_struct { - ib_uint64_t redo; /**< amount of redo generated. */ - ulint n_flushed; /**< number of pages flushed. */ + lsn_t redo; /**< amount of redo generated. */ + ulint n_flushed; /**< number of pages flushed. */ }; /** Statistics for selecting flush rate of dirty pages. */ @@ -208,18 +201,16 @@ UNIV_INTERN void buf_flush_stat_update(void); /*=======================*/ -/********************************************************************* -Determines the fraction of dirty pages that need to be flushed based -on the speed at which we generate redo log. Note that if redo log -is generated at significant rate without a corresponding increase -in the number of dirty pages (for example, an in-memory workload) -it can cause IO bursts of flushing. This function implements heuristics -to avoid this burstiness. -@return number of dirty pages to be flushed / second */ -UNIV_INTERN -ulint -buf_flush_get_desired_flush_rate(void); -/*==================================*/ +/******************************************************************//** +page_cleaner thread tasked with flushing dirty pages from the buffer +pools. As of now we'll have only one instance of this thread. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(buf_flush_page_cleaner_thread)( +/*==========================================*/ + void* arg); /*!< in: a dummy parameter required by + os_thread_create */ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /******************************************************************//** @@ -247,15 +238,6 @@ UNIV_INTERN void buf_flush_free_flush_rbt(void); /*==========================*/ - -/** When buf_flush_free_margin is called, it tries to make this many blocks -available to replacement in the free list and at the end of the LRU list (to -make sure that a read-ahead batch can be read efficiently in a single -sweep). */ -#define BUF_FLUSH_FREE_BLOCK_MARGIN(b) (5 + BUF_READ_AHEAD_AREA(b)) -/** Extra margin to apply above BUF_FLUSH_FREE_BLOCK_MARGIN */ -#define BUF_FLUSH_EXTRA_MARGIN(b) ((BUF_FLUSH_FREE_BLOCK_MARGIN(b) / 4 \ - + 100) / srv_buf_pool_instances) #endif /* !UNIV_HOTBACKUP */ #ifndef UNIV_NONINL diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic index 30e2cc8efe8..68a76c0b637 100644 --- a/storage/innobase/include/buf0flu.ic +++ b/storage/innobase/include/buf0flu.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -35,7 +35,7 @@ buf_flush_insert_into_flush_list( /*=============================*/ buf_pool_t* buf_pool, /*!< buffer pool instance */ buf_block_t* block, /*!< in/out: block which is modified */ - ib_uint64_t lsn); /*!< in: oldest modification */ + lsn_t lsn); /*!< in: oldest modification */ /********************************************************************//** Inserts a modified block into the flush list in the right sorted position. This function is used by recovery, because there the modifications do not @@ -46,7 +46,7 @@ buf_flush_insert_sorted_into_flush_list( /*====================================*/ buf_pool_t* buf_pool, /*!< buffer pool instance */ buf_block_t* block, /*!< in/out: block which is modified */ - ib_uint64_t lsn); /*!< in: oldest modification */ + lsn_t lsn); /*!< in: oldest modification */ /********************************************************************//** This function should be called at a mini-transaction commit, if a page was @@ -70,7 +70,7 @@ buf_flush_note_modification( ut_ad(!buf_pool_mutex_own(buf_pool)); ut_ad(!buf_flush_list_mutex_own(buf_pool)); - ut_ad(log_flush_order_mutex_own()); + ut_ad(!mtr->made_dirty || log_flush_order_mutex_own()); ut_ad(mtr->start_lsn != 0); ut_ad(mtr->modifications); @@ -81,6 +81,8 @@ buf_flush_note_modification( block->page.newest_modification = mtr->end_lsn; if (!block->page.oldest_modification) { + ut_a(mtr->made_dirty); + ut_ad(log_flush_order_mutex_own()); buf_flush_insert_into_flush_list( buf_pool, block, mtr->start_lsn); } else { @@ -99,9 +101,9 @@ void buf_flush_recv_note_modification( /*=============================*/ buf_block_t* block, /*!< in: block which is modified */ - ib_uint64_t start_lsn, /*!< in: start lsn of the first mtr in a + lsn_t start_lsn, /*!< in: start lsn of the first mtr in a set of mtr's */ - ib_uint64_t end_lsn) /*!< in: end lsn of the last mtr in the + lsn_t end_lsn) /*!< in: end lsn of the last mtr in the set of mtr's */ { buf_pool_t* buf_pool = buf_pool_from_block(block); diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h index 9ecb9de2afe..527852da758 100644 --- a/storage/innobase/include/buf0lru.h +++ b/storage/innobase/include/buf0lru.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -27,23 +27,11 @@ Created 11/5/1995 Heikki Tuuri #define buf0lru_h #include "univ.i" +#ifndef UNIV_HOTBACKUP #include "ut0byte.h" #include "buf0types.h" /******************************************************************//** -Tries to remove LRU flushed blocks from the end of the LRU list and put them -to the free list. This is beneficial for the efficiency of the insert buffer -operation, as flushed pages from non-unique non-clustered indexes are here -taken out of the buffer pool, and their inserts redirected to the insert -buffer. Otherwise, the flushed blocks could get modified again before read -operations need new buffer blocks, and the i/o work done in flushing would be -wasted. */ -UNIV_INTERN -void -buf_LRU_try_free_flushed_blocks( -/*============================*/ - buf_pool_t* buf_pool); /*!< in: buffer pool instance */ -/******************************************************************//** Returns TRUE if less than 25 % of the buffer pool is available. This can be used in heuristics to prevent huge transactions eating up the whole buffer pool for their locks. @@ -60,18 +48,16 @@ These are low-level functions /** Minimum LRU list length for which the LRU_old pointer is defined */ #define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */ -/** Maximum LRU list search length in buf_flush_LRU_recommendation() */ -#define BUF_LRU_FREE_SEARCH_LEN(b) (5 + 2 * BUF_READ_AHEAD_AREA(b)) - /******************************************************************//** -Removes all pages belonging to a given tablespace. */ +Invalidates all pages belonging to a given tablespace when we are deleting +the data file(s) of that tablespace. A PROBLEM: if readahead is being started, +what guarantees that it will not try to read in pages after this operation has +completed? */ UNIV_INTERN void -buf_LRU_flush_or_remove_pages( +buf_LRU_invalidate_tablespace( /*==========================*/ - ulint id, /*!< in: space id */ - enum buf_remove_t buf_remove);/*!< in: remove or flush - strategy */ + ulint id); /*!< in: space id */ #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /********************************************************************//** Insert a compressed block into buf_pool->zip_clean in the LRU order. */ @@ -90,9 +76,8 @@ NOTE: If this function returns TRUE, it will temporarily release buf_pool->mutex. Furthermore, the page frame will no longer be accessible via bpage. -The caller must hold buf_pool->mutex and buf_page_get_mutex(bpage) and -release these two mutexes after the call. No other -buf_page_get_mutex() may be held when calling this function. +The caller must hold buf_pool->mutex and must not hold any +buf_page_get_mutex() when calling this function. @return TRUE if freed, FALSE otherwise. */ UNIV_INTERN ibool @@ -107,19 +92,13 @@ Try to free a replaceable block. @return TRUE if found and freed */ UNIV_INTERN ibool -buf_LRU_search_and_free_block( -/*==========================*/ +buf_LRU_scan_and_free_block( +/*========================*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ - ulint n_iterations); /*!< in: how many times this has - been called repeatedly without - result: a high value means that - we should search farther; if - n_iterations < 10, then we search - n_iterations / 10 * buf_pool->curr_size - pages from the end of the LRU list; if - n_iterations < 5, then we will - also search n_iterations / 5 - of the unzip_LRU list. */ + ibool scan_all) /*!< in: scan whole LRU list + if TRUE, otherwise scan only + 'old' blocks. */ + __attribute__((nonnull,warn_unused_result)); /******************************************************************//** Returns a free block from the buf_pool. The block is taken off the free list. If it is empty, returns NULL. @@ -133,6 +112,27 @@ buf_LRU_get_free_only( Returns a free block from the buf_pool. The block is taken off the free list. If it is empty, blocks are moved from the end of the LRU list to the free list. +This function is called from a user thread when it needs a clean +block to read in a page. Note that we only ever get a block from +the free list. Even when we flush a page or find a page in LRU scan +we put it to free list to be used. +* iteration 0: + * get a block from free list, success:done + * if there is an LRU flush batch in progress: + * wait for batch to end: retry free list + * if buf_pool->try_LRU_scan is set + * scan LRU up to srv_LRU_scan_depth to find a clean block + * the above will put the block on free list + * success:retry the free list + * flush one dirty page from tail of LRU to disk + * the above will put the block on free list + * success: retry the free list +* iteration 1: + * same as iteration 0 except: + * scan whole LRU list + * scan LRU list even if buf_pool->try_LRU_scan is not set +* iteration > 1: + * same as iteration 1 but sleep 100ms @return the free control block, in state BUF_BLOCK_READY_FOR_USE */ UNIV_INTERN buf_block_t* @@ -140,7 +140,15 @@ buf_LRU_get_free_block( /*===================*/ buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */ __attribute__((nonnull,warn_unused_result)); - +/******************************************************************//** +Determines if the unzip_LRU list should be used for evicting a victim +instead of the general LRU list. +@return TRUE if should use unzip_LRU */ +UNIV_INTERN +ibool +buf_LRU_evict_from_unzip_LRU( +/*=========================*/ + buf_pool_t* buf_pool); /******************************************************************//** Puts a block back to the free list. */ UNIV_INTERN @@ -290,4 +298,6 @@ Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */ #include "buf0lru.ic" #endif +#endif /* !UNIV_HOTBACKUP */ + #endif diff --git a/storage/innobase/include/buf0lru.ic b/storage/innobase/include/buf0lru.ic index 556f45d987f..6e0da7a2588 100644 --- a/storage/innobase/include/buf0lru.ic +++ b/storage/innobase/include/buf0lru.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h index cd5eff66ee8..b98ff121209 100644 --- a/storage/innobase/include/buf0rea.h +++ b/storage/innobase/include/buf0rea.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -43,6 +43,18 @@ buf_read_page( ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ ulint offset);/*!< in: page number */ /********************************************************************//** +High-level function which reads a page asynchronously from a file to the +buffer buf_pool if it is not already there. Sets the io_fix flag and sets +an exclusive lock on the buffer frame. The flag is cleared and the x-lock +released by the i/o-handler thread. +@return TRUE if page has been read in, FALSE in case of failure */ +UNIV_INTERN +ibool +buf_read_page_async( +/*================*/ + ulint space, /*!< in: space id */ + ulint offset);/*!< in: page number */ +/********************************************************************//** Applies a random read-ahead in buf_pool if there are at least a threshold value of accessed pages from the random read-ahead area. Does not read any page, not even the one at the position (space, offset), if the read-ahead @@ -157,6 +169,9 @@ invoked */ #define BUF_READ_IBUF_PAGES_ONLY 131 /** read any page */ #define BUF_READ_ANY_PAGE 132 +/** read any page, but ignore (return an error) if a page does not exist +instead of crashing like BUF_READ_ANY_PAGE does */ +#define BUF_READ_IGNORE_NONEXISTENT_PAGES 1024 /* @} */ #endif diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index 2916f39f3fe..ba54a8aeeea 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -26,8 +26,6 @@ Created 11/17/1995 Heikki Tuuri #ifndef buf0types_h #define buf0types_h -#include "page0types.h" - /** Buffer page (uncompressed or compressed) */ typedef struct buf_page_struct buf_page_t; /** Buffer block for which an uncompressed page exists */ @@ -40,6 +38,8 @@ typedef struct buf_pool_struct buf_pool_t; typedef struct buf_pool_stat_struct buf_pool_stat_t; /** Buffer pool buddy statistics struct */ typedef struct buf_buddy_stat_struct buf_buddy_stat_t; +/** Doublewrite memory struct */ +typedef struct buf_dblwr_struct buf_dblwr_t; /** A buffer frame. @see page_t */ typedef byte buf_frame_t; @@ -47,9 +47,10 @@ typedef byte buf_frame_t; /** Flags for flush types */ enum buf_flush { BUF_FLUSH_LRU = 0, /*!< flush via the LRU list */ - BUF_FLUSH_SINGLE_PAGE, /*!< flush a single page */ BUF_FLUSH_LIST, /*!< flush via the flush list of dirty blocks */ + BUF_FLUSH_SINGLE_PAGE, /*!< flush via the LRU list + but only a single page */ BUF_FLUSH_N_TYPES /*!< index of last element + 1 */ }; @@ -63,23 +64,39 @@ enum buf_io_fix { the flush_list */ }; -/** Algorithm to remove the pages for a tablespace from the buffer pool. -@See buf_LRU_flush_or_remove_pages(). */ -enum buf_remove_t { - BUF_REMOVE_ALL_NO_WRITE, /*!< Remove all pages from the buffer - pool, don't write or sync to disk */ - BUF_REMOVE_FLUSH_NO_WRITE /*!< Remove only, from the flush list, - don't write or sync to disk */ +/** Alternatives for srv_checksum_algorithm, which can be changed by +setting innodb_checksum_algorithm */ +enum srv_checksum_algorithm_enum { + SRV_CHECKSUM_ALGORITHM_CRC32, /*!< Write crc32, allow crc32, + innodb or none when reading */ + SRV_CHECKSUM_ALGORITHM_STRICT_CRC32, /*!< Write crc32, allow crc32 + when reading */ + SRV_CHECKSUM_ALGORITHM_INNODB, /*!< Write innodb, allow crc32, + innodb or none when reading */ + SRV_CHECKSUM_ALGORITHM_STRICT_INNODB, /*!< Write innodb, allow + innodb when reading */ + SRV_CHECKSUM_ALGORITHM_NONE, /*!< Write none, allow crc32, + innodb or none when reading */ + SRV_CHECKSUM_ALGORITHM_STRICT_NONE /*!< Write none, allow none + when reading */ }; +typedef enum srv_checksum_algorithm_enum srv_checksum_algorithm_t; + /** Parameters of binary buddy system for compressed pages (buf0buddy.h) */ /* @{ */ -#define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT +/** Zip shift value for the smallest page size */ +#define BUF_BUDDY_LOW_SHIFT UNIV_ZIP_SIZE_SHIFT_MIN -#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT) +/** Smallest buddy page size */ +#define BUF_BUDDY_LOW (1U << BUF_BUDDY_LOW_SHIFT) +/** Actual number of buddy sizes based on current page size */ #define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT) - /*!< number of buddy sizes */ + +/** Maximum number of buddy sizes based on the max page size */ +#define BUF_BUDDY_SIZES_MAX (UNIV_PAGE_SIZE_SHIFT_MAX \ + - BUF_BUDDY_LOW_SHIFT) /** twice the maximum block size of the buddy system; the underlying memory is aligned by this amount: @@ -87,5 +104,4 @@ this must be equal to UNIV_PAGE_SIZE */ #define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES) /* @} */ -#endif - +#endif /* buf0types.h */ diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h index 6d3c2988fdc..37364e891f5 100644 --- a/storage/innobase/include/data0data.h +++ b/storage/innobase/include/data0data.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -153,6 +153,7 @@ dfield_dup( /*=======*/ dfield_t* field, /*!< in/out: data field */ mem_heap_t* heap); /*!< in: memory heap where allocated */ +#ifndef UNIV_HOTBACKUP /*********************************************************************//** Tests if two data fields are equal. If len==0, tests the data length and content for equality. @@ -170,13 +171,15 @@ dfield_datas_are_binary_equal( /*********************************************************************//** Tests if dfield data length and content is equal to the given. @return TRUE if equal */ -UNIV_INTERN +UNIV_INLINE ibool dfield_data_is_binary_equal( /*========================*/ const dfield_t* field, /*!< in: field */ ulint len, /*!< in: data length or UNIV_SQL_NULL */ - const byte* data); /*!< in: data */ + const byte* data) /*!< in: data */ + __attribute__((nonnull, warn_unused_result)); +#endif /* !UNIV_HOTBACKUP */ /*********************************************************************//** Gets number of fields in a data tuple. @return number of fields */ diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic index 205fa397987..da50e91e98d 100644 --- a/storage/innobase/include/data0data.ic +++ b/storage/innobase/include/data0data.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -138,7 +138,7 @@ dfield_is_ext( { ut_ad(field); - return(UNIV_UNLIKELY(field->ext)); + return(field->ext); } /*********************************************************************//** @@ -228,6 +228,7 @@ dfield_dup( } } +#ifndef UNIV_HOTBACKUP /*********************************************************************//** Tests if two data fields are equal. If len==0, tests the data length and content for equality. @@ -258,6 +259,23 @@ dfield_datas_are_binary_equal( } /*********************************************************************//** +Tests if dfield data length and content is equal to the given. +@return TRUE if equal */ +UNIV_INLINE +ibool +dfield_data_is_binary_equal( +/*========================*/ + const dfield_t* field, /*!< in: field */ + ulint len, /*!< in: data length or UNIV_SQL_NULL */ + const byte* data) /*!< in: data */ +{ + return(len == dfield_get_len(field) + && (len == UNIV_SQL_NULL + || !memcmp(dfield_get_data(field), data, len))); +} +#endif /* !UNIV_HOTBACKUP */ + +/*********************************************************************//** Gets info bits in a data tuple. @return info bits */ UNIV_INLINE diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h index d7fa0b9cd44..c7fcf316f24 100644 --- a/storage/innobase/include/data0type.h +++ b/storage/innobase/include/data0type.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -35,6 +35,16 @@ extern ulint data_mysql_default_charset_coll; /* SQL data type struct */ typedef struct dtype_struct dtype_t; +/* SQL Like operator comparison types */ +enum ib_like_enum { + IB_LIKE_EXACT, /* e.g. STRING */ + IB_LIKE_PREFIX, /* e.g., STRING% */ + IB_LIKE_SUFFIX, /* e.g., %STRING */ + IB_LIKE_SUBSTR, /* e.g., %STRING% */ + IB_LIKE_REGEXP /* Future */ +}; +typedef enum ib_like_enum ib_like_t; + /*-------------------------------------------*/ /* The 'MAIN TYPE' of a column */ #define DATA_VARCHAR 1 /* character varying of the @@ -139,6 +149,8 @@ be less than 256 */ #define DATA_N_SYS_COLS 3 /* number of system columns defined above */ +#define DATA_FTS_DOC_ID 3 /* Used as FTS DOC ID column */ + #define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */ /* Flags ORed to the precise data type */ @@ -182,6 +194,12 @@ because in GCC it returns a long. */ /* Get mbmaxlen from mbminmaxlen. */ #define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX)) +/* We now support 15 bits (up to 32767) collation number */ +#define MAX_CHAR_COLL_NUM 32767 + +/* Mask to get the Charset Collation number (0x7fff) */ +#define CHAR_COLL_MASK MAX_CHAR_COLL_NUM + #ifndef UNIV_HOTBACKUP /*********************************************************************//** Gets the MySQL type code from a dtype. @@ -450,6 +468,20 @@ dtype_new_read_for_order_and_null_size( /*===================================*/ dtype_t* type, /*!< in: type struct */ const byte* buf); /*!< in: buffer for stored type order info */ + +/*********************************************************************//** +Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len +@return the SQL type name */ +UNIV_INLINE +char* +dtype_sql_name( +/*===========*/ + unsigned mtype, /*!< in: mtype */ + unsigned prtype, /*!< in: prtype */ + unsigned len, /*!< in: len */ + char* name, /*!< out: SQL name */ + unsigned name_sz);/*!< in: size of the name buffer */ + #endif /* !UNIV_HOTBACKUP */ /*********************************************************************//** @@ -477,14 +509,14 @@ dtype_new_read_for_order_and_null_size() sym_tab_add_null_lit() */ struct dtype_struct{ - unsigned mtype:8; /*!< main data type */ - unsigned prtype:24; /*!< precise type; MySQL data + unsigned prtype:32; /*!< precise type; MySQL data type, charset code, flags to indicate nullability, signedness, whether this is a binary string, whether this is a true VARCHAR where MySQL uses 2 bytes to store the length */ + unsigned mtype:8; /*!< main data type */ /* the remaining fields do not affect alphabetical ordering: */ diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic index 757dd815c5e..a5e94a8edff 100644 --- a/storage/innobase/include/data0type.ic +++ b/storage/innobase/include/data0type.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -23,6 +23,8 @@ Data types Created 1/16/1996 Heikki Tuuri *******************************************************/ +#include <string.h> /* strlen() */ + #include "mach0data.h" #ifndef UNIV_HOTBACKUP # include "ha_prototypes.h" @@ -36,7 +38,7 @@ dtype_get_charset_coll( /*===================*/ ulint prtype) /*!< in: precise data type */ { - return((prtype >> 16) & 0xFFUL); + return((prtype >> 16) & CHAR_COLL_MASK); } /*********************************************************************//** @@ -259,8 +261,8 @@ dtype_get_pad_char( switch (mtype) { case DATA_FIXBINARY: case DATA_BINARY: - if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype) - == DATA_MYSQL_BINARY_CHARSET_COLL)) { + if (dtype_get_charset_coll(prtype) + == DATA_MYSQL_BINARY_CHARSET_COLL) { /* Starting from 5.0.18, do not pad VARBINARY or BINARY columns. */ return(ULINT_UNDEFINED); @@ -312,11 +314,11 @@ dtype_new_store_for_order_and_null_size( buf[0] = (byte)(type->mtype & 0xFFUL); if (type->prtype & DATA_BINARY_TYPE) { - buf[0] = buf[0] | 128; + buf[0] |= 128; } /* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) { - buf[0] = buf[0] | 64; + buf[0] |= 64; } */ @@ -326,7 +328,7 @@ dtype_new_store_for_order_and_null_size( mach_write_to_2(buf + 2, len & 0xFFFFUL); - ut_ad(dtype_get_charset_coll(type->prtype) < 256); + ut_ad(dtype_get_charset_coll(type->prtype) <= MAX_CHAR_COLL_NUM); mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype)); if (type->prtype & DATA_NOT_NULL) { @@ -353,7 +355,7 @@ dtype_read_for_order_and_null_size( type->prtype = buf[1]; if (buf[0] & 128) { - type->prtype = type->prtype | DATA_BINARY_TYPE; + type->prtype |= DATA_BINARY_TYPE; } type->len = mach_read_from_2(buf + 2); @@ -393,10 +395,10 @@ dtype_new_read_for_order_and_null_size( type->len = mach_read_from_2(buf + 2); - charset_coll = mach_read_from_2(buf + 4) & 0x7fff; + charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK; if (dtype_is_string_type(type->mtype)) { - ut_a(charset_coll < 256); + ut_a(charset_coll <= MAX_CHAR_COLL_NUM); if (charset_coll == 0) { /* This insert buffer record was inserted with MySQL @@ -412,6 +414,101 @@ dtype_new_read_for_order_and_null_size( } dtype_set_mblen(type); } + +/*********************************************************************//** +Returns the type's SQL name (e.g. BIGINT UNSIGNED) from mtype,prtype,len +@return the SQL type name */ +UNIV_INLINE +char* +dtype_sql_name( +/*===========*/ + unsigned mtype, /*!< in: mtype */ + unsigned prtype, /*!< in: prtype */ + unsigned len, /*!< in: len */ + char* name, /*!< out: SQL name */ + unsigned name_sz)/*!< in: size of the name buffer */ +{ + +#define APPEND_UNSIGNED() \ + do { \ + if (prtype & DATA_UNSIGNED) { \ + ut_snprintf(name + strlen(name), \ + name_sz - strlen(name), \ + " UNSIGNED"); \ + } \ + } while (0) + + ut_snprintf(name, name_sz, "UNKNOWN"); + + switch (mtype) { + case DATA_INT: + switch (len) { + case 1: + ut_snprintf(name, name_sz, "TINYINT"); + break; + case 2: + ut_snprintf(name, name_sz, "SMALLINT"); + break; + case 3: + ut_snprintf(name, name_sz, "MEDIUMINT"); + break; + case 4: + ut_snprintf(name, name_sz, "INT"); + break; + case 8: + ut_snprintf(name, name_sz, "BIGINT"); + break; + } + APPEND_UNSIGNED(); + break; + case DATA_FLOAT: + ut_snprintf(name, name_sz, "FLOAT"); + APPEND_UNSIGNED(); + break; + case DATA_DOUBLE: + ut_snprintf(name, name_sz, "DOUBLE"); + APPEND_UNSIGNED(); + break; + case DATA_FIXBINARY: + ut_snprintf(name, name_sz, "BINARY(%u)", len); + break; + case DATA_CHAR: + case DATA_MYSQL: + ut_snprintf(name, name_sz, "CHAR(%u)", len); + break; + case DATA_VARCHAR: + case DATA_VARMYSQL: + ut_snprintf(name, name_sz, "VARCHAR(%u)", len); + break; + case DATA_BINARY: + ut_snprintf(name, name_sz, "VARBINARY(%u)", len); + break; + case DATA_BLOB: + switch (len) { + case 9: + ut_snprintf(name, name_sz, "TINYBLOB"); + break; + case 10: + ut_snprintf(name, name_sz, "BLOB"); + break; + case 11: + ut_snprintf(name, name_sz, "MEDIUMBLOB"); + break; + case 12: + ut_snprintf(name, name_sz, "LONGBLOB"); + break; + } + } + + if (prtype & DATA_NOT_NULL) { + ut_snprintf(name + strlen(name), + name_sz - strlen(name), + " NOT NULL"); + } + + return(name); +} + #endif /* !UNIV_HOTBACKUP */ /***********************************************************************//** @@ -473,9 +570,8 @@ dtype_get_fixed_size_low( dtype_get_charset_coll(prtype), &i_mbminlen, &i_mbmaxlen); - if (UNIV_UNLIKELY - (DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen) - != mbminmaxlen)) { + if (DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen) + != mbminmaxlen) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: " diff --git a/storage/innobase/include/data0types.h b/storage/innobase/include/data0types.h index 04e835bc401..7d599ef2c8d 100644 --- a/storage/innobase/include/data0types.h +++ b/storage/innobase/include/data0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2000, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h index 95ccef16be0..1a3499b09e0 100644 --- a/storage/innobase/include/db0err.h +++ b/storage/innobase/include/db0err.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -42,78 +42,79 @@ enum db_err { DB_ROLLBACK, DB_DUPLICATE_KEY, DB_QUE_THR_SUSPENDED, - DB_MISSING_HISTORY, /* required history data has been + DB_MISSING_HISTORY, /*!< required history data has been deleted due to lack of space in rollback segment */ DB_CLUSTER_NOT_FOUND = 30, DB_TABLE_NOT_FOUND, - DB_MUST_GET_MORE_FILE_SPACE, /* the database has to be stopped + DB_MUST_GET_MORE_FILE_SPACE, /*!< the database has to be stopped and restarted with more file space */ DB_TABLE_IS_BEING_USED, - DB_TOO_BIG_RECORD, /* a record in an index would not fit + DB_TOO_BIG_RECORD, /*!< a record in an index would not fit on a compressed page, or it would become bigger than 1/2 free space in an uncompressed page frame */ - DB_LOCK_WAIT_TIMEOUT, /* lock wait lasted too long */ - DB_NO_REFERENCED_ROW, /* referenced key value not found + DB_LOCK_WAIT_TIMEOUT, /*!< lock wait lasted too long */ + DB_NO_REFERENCED_ROW, /*!< referenced key value not found for a foreign key in an insert or update of a row */ - DB_ROW_IS_REFERENCED, /* cannot delete or update a row + DB_ROW_IS_REFERENCED, /*!< cannot delete or update a row because it contains a key value which is referenced */ - DB_CANNOT_ADD_CONSTRAINT, /* adding a foreign key constraint + DB_CANNOT_ADD_CONSTRAINT, /*!< adding a foreign key constraint to a table failed */ - DB_CORRUPTION, /* data structure corruption noticed */ - DB_CANNOT_DROP_CONSTRAINT, /* dropping a foreign key constraint + DB_CORRUPTION, /*!< data structure corruption noticed */ + DB_CANNOT_DROP_CONSTRAINT, /*!< dropping a foreign key constraint from a table failed */ - DB_NO_SAVEPOINT, /* no savepoint exists with the given + DB_NO_SAVEPOINT, /*!< no savepoint exists with the given name */ - DB_TABLESPACE_ALREADY_EXISTS, /* we cannot create a new single-table + DB_TABLESPACE_ALREADY_EXISTS, /*!< we cannot create a new single-table tablespace because a file of the same name already exists */ - DB_TABLESPACE_DELETED, /* tablespace does not exist or is + DB_TABLESPACE_DELETED, /*!< tablespace does not exist or is being dropped right now */ - DB_LOCK_TABLE_FULL, /* lock structs have exhausted the + DB_LOCK_TABLE_FULL, /*!< lock structs have exhausted the buffer pool (for big transactions, InnoDB stores the lock structs in the buffer pool) */ - DB_FOREIGN_DUPLICATE_KEY, /* foreign key constraints + DB_FOREIGN_DUPLICATE_KEY, /*!< foreign key constraints activated by the operation would lead to a duplicate key in some table */ - DB_TOO_MANY_CONCURRENT_TRXS, /* when InnoDB runs out of the + DB_TOO_MANY_CONCURRENT_TRXS, /*!< when InnoDB runs out of the preconfigured undo slots, this can only happen when there are too many concurrent transactions */ - DB_UNSUPPORTED, /* when InnoDB sees any artefact or + DB_UNSUPPORTED, /*!< when InnoDB sees any artefact or a feature that it can't recoginize or work with e.g., FT indexes created by a later version of the engine. */ - DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY + DB_PRIMARY_KEY_IS_NULL, /*!< a column in the PRIMARY KEY was found to be NULL */ - DB_STATS_DO_NOT_EXIST, /* an operation that requires the + DB_STATS_DO_NOT_EXIST, /*!< an operation that requires the persistent storage, used for recording table and index statistics, was requested but this storage does not exist itself or the stats for a given table do not exist */ - DB_FOREIGN_EXCEED_MAX_CASCADE, /* Foreign key constraint related + DB_FOREIGN_EXCEED_MAX_CASCADE, /*!< Foreign key constraint related cascading delete/update exceeds maximum allowed depth */ - DB_CHILD_NO_INDEX, /* the child (foreign) table does not - have an index that contains the + DB_CHILD_NO_INDEX, /*!< the child (foreign) table does + not have an index that contains the foreign keys as its prefix columns */ - DB_PARENT_NO_INDEX, /* the parent table does not + DB_PARENT_NO_INDEX, /*!< the parent table does not have an index that contains the foreign keys as its prefix columns */ - DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum - limit */ - DB_INDEX_CORRUPT, /* we have corrupted index */ - DB_UNDO_RECORD_TOO_BIG, /* the undo log record is too big */ - DB_TABLE_IN_FK_CHECK, /* table is being used in foreign - key check */ + DB_TOO_BIG_INDEX_COL, /*!< index column size exceeds + maximum limit */ + DB_INDEX_CORRUPT, /*!< we have corrupted index */ + DB_UNDO_RECORD_TOO_BIG, /*!< the undo log record is too big */ + DB_READ_ONLY, /*!< Update operation attempted in + a read-only transaction */ + DB_FTS_INVALID_DOCID, /* FTS Doc ID cannot be zero */ /* The following are partial failure codes */ DB_FAIL = 1000, diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h index 5d136862bc6..364aa746638 100644 --- a/storage/innobase/include/dict0boot.h +++ b/storage/innobase/include/dict0boot.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -113,7 +113,6 @@ dict_create(void); indexes; ibuf tables and indexes are assigned as the id the number DICT_IBUF_ID_MIN plus the space id */ -#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL /* The offset of the dictionary header on the page */ #define DICT_HDR FSEG_PAGE_DATA @@ -121,30 +120,159 @@ dict_create(void); /*-------------------------------------------------------------*/ /* Dictionary header offsets */ #define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ -#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ -#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ -#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id, or 0*/ -#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID */ -#define DICT_HDR_TABLES 32 /* Root of the table index tree */ -#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */ -#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */ -#define DICT_HDR_INDEXES 44 /* Root of the index index tree */ -#define DICT_HDR_FIELDS 48 /* Root of the index field - index tree */ +#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ +#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ +#define DICT_HDR_MAX_SPACE_ID 24 /* The latest assigned space id,or 0*/ +#define DICT_HDR_MIX_ID_LOW 28 /* Obsolete,always DICT_HDR_FIRST_ID*/ +#define DICT_HDR_TABLES 32 /* Root of SYS_TABLES clust index */ +#define DICT_HDR_TABLE_IDS 36 /* Root of SYS_TABLE_IDS sec index */ +#define DICT_HDR_COLUMNS 40 /* Root of SYS_COLUMNS clust index */ +#define DICT_HDR_INDEXES 44 /* Root of SYS_INDEXES clust index */ +#define DICT_HDR_FIELDS 48 /* Root of SYS_FIELDS clust index */ #define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace segment into which the dictionary header is created */ /*-------------------------------------------------------------*/ +/* The columns in SYS_TABLES */ +enum dict_col_sys_tables_enum { + DICT_COL__SYS_TABLES__NAME = 0, + DICT_COL__SYS_TABLES__ID = 1, + DICT_COL__SYS_TABLES__N_COLS = 2, + DICT_COL__SYS_TABLES__TYPE = 3, + DICT_COL__SYS_TABLES__MIX_ID = 4, + DICT_COL__SYS_TABLES__MIX_LEN = 5, + DICT_COL__SYS_TABLES__CLUSTER_ID = 6, + DICT_COL__SYS_TABLES__SPACE = 7, + DICT_NUM_COLS__SYS_TABLES = 8 +}; /* The field numbers in the SYS_TABLES clustered index */ -#define DICT_SYS_TABLES_TYPE_FIELD 5 - +enum dict_fld_sys_tables_enum { + DICT_FLD__SYS_TABLES__NAME = 0, + DICT_FLD__SYS_TABLES__DB_TRX_ID = 1, + DICT_FLD__SYS_TABLES__DB_ROLL_PTR = 2, + DICT_FLD__SYS_TABLES__ID = 3, + DICT_FLD__SYS_TABLES__N_COLS = 4, + DICT_FLD__SYS_TABLES__TYPE = 5, + DICT_FLD__SYS_TABLES__MIX_ID = 6, + DICT_FLD__SYS_TABLES__MIX_LEN = 7, + DICT_FLD__SYS_TABLES__CLUSTER_ID = 8, + DICT_FLD__SYS_TABLES__SPACE = 9, + DICT_NUM_FIELDS__SYS_TABLES = 10 +}; +/* The field numbers in the SYS_TABLE_IDS index */ +enum dict_fld_sys_table_ids_enum { + DICT_FLD__SYS_TABLE_IDS__ID = 0, + DICT_FLD__SYS_TABLE_IDS__NAME = 1, + DICT_NUM_FIELDS__SYS_TABLE_IDS = 2 +}; +/* The columns in SYS_COLUMNS */ +enum dict_col_sys_columns_enum { + DICT_COL__SYS_COLUMNS__TABLE_ID = 0, + DICT_COL__SYS_COLUMNS__POS = 1, + DICT_COL__SYS_COLUMNS__NAME = 2, + DICT_COL__SYS_COLUMNS__MTYPE = 3, + DICT_COL__SYS_COLUMNS__PRTYPE = 4, + DICT_COL__SYS_COLUMNS__LEN = 5, + DICT_COL__SYS_COLUMNS__PREC = 6, + DICT_NUM_COLS__SYS_COLUMNS = 7 +}; +/* The field numbers in the SYS_COLUMNS clustered index */ +enum dict_fld_sys_columns_enum { + DICT_FLD__SYS_COLUMNS__TABLE_ID = 0, + DICT_FLD__SYS_COLUMNS__POS = 1, + DICT_FLD__SYS_COLUMNS__DB_TRX_ID = 2, + DICT_FLD__SYS_COLUMNS__DB_ROLL_PTR = 3, + DICT_FLD__SYS_COLUMNS__NAME = 4, + DICT_FLD__SYS_COLUMNS__MTYPE = 5, + DICT_FLD__SYS_COLUMNS__PRTYPE = 6, + DICT_FLD__SYS_COLUMNS__LEN = 7, + DICT_FLD__SYS_COLUMNS__PREC = 8, + DICT_NUM_FIELDS__SYS_COLUMNS = 9 +}; +/* The columns in SYS_INDEXES */ +enum dict_col_sys_indexes_enum { + DICT_COL__SYS_INDEXES__TABLE_ID = 0, + DICT_COL__SYS_INDEXES__ID = 1, + DICT_COL__SYS_INDEXES__NAME = 2, + DICT_COL__SYS_INDEXES__N_FIELDS = 3, + DICT_COL__SYS_INDEXES__TYPE = 4, + DICT_COL__SYS_INDEXES__SPACE = 5, + DICT_COL__SYS_INDEXES__PAGE_NO = 6, + DICT_NUM_COLS__SYS_INDEXES = 7 +}; /* The field numbers in the SYS_INDEXES clustered index */ -#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 -#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 -#define DICT_SYS_INDEXES_TYPE_FIELD 6 -#define DICT_SYS_INDEXES_NAME_FIELD 4 +enum dict_fld_sys_indexes_enum { + DICT_FLD__SYS_INDEXES__TABLE_ID = 0, + DICT_FLD__SYS_INDEXES__ID = 1, + DICT_FLD__SYS_INDEXES__DB_TRX_ID = 2, + DICT_FLD__SYS_INDEXES__DB_ROLL_PTR = 3, + DICT_FLD__SYS_INDEXES__NAME = 4, + DICT_FLD__SYS_INDEXES__N_FIELDS = 5, + DICT_FLD__SYS_INDEXES__TYPE = 6, + DICT_FLD__SYS_INDEXES__SPACE = 7, + DICT_FLD__SYS_INDEXES__PAGE_NO = 8, + DICT_NUM_FIELDS__SYS_INDEXES = 9 +}; +/* The columns in SYS_FIELDS */ +enum dict_col_sys_fields_enum { + DICT_COL__SYS_FIELDS__INDEX_ID = 0, + DICT_COL__SYS_FIELDS__POS = 1, + DICT_COL__SYS_FIELDS__COL_NAME = 2, + DICT_NUM_COLS__SYS_FIELDS = 3 +}; +/* The field numbers in the SYS_FIELDS clustered index */ +enum dict_fld_sys_fields_enum { + DICT_FLD__SYS_FIELDS__INDEX_ID = 0, + DICT_FLD__SYS_FIELDS__POS = 1, + DICT_FLD__SYS_FIELDS__DB_TRX_ID = 2, + DICT_FLD__SYS_FIELDS__DB_ROLL_PTR = 3, + DICT_FLD__SYS_FIELDS__COL_NAME = 4, + DICT_NUM_FIELDS__SYS_FIELDS = 5 +}; +/* The columns in SYS_FOREIGN */ +enum dict_col_sys_foreign_enum { + DICT_COL__SYS_FOREIGN__ID = 0, + DICT_COL__SYS_FOREIGN__FOR_NAME = 1, + DICT_COL__SYS_FOREIGN__REF_NAME = 2, + DICT_COL__SYS_FOREIGN__N_COLS = 3, + DICT_NUM_COLS__SYS_FOREIGN = 4 +}; +/* The field numbers in the SYS_FOREIGN clustered index */ +enum dict_fld_sys_foreign_enum { + DICT_FLD__SYS_FOREIGN__ID = 0, + DICT_FLD__SYS_FOREIGN__DB_TRX_ID = 1, + DICT_FLD__SYS_FOREIGN__DB_ROLL_PTR = 2, + DICT_FLD__SYS_FOREIGN__FOR_NAME = 3, + DICT_FLD__SYS_FOREIGN__REF_NAME = 4, + DICT_FLD__SYS_FOREIGN__N_COLS = 5, + DICT_NUM_FIELDS__SYS_FOREIGN = 6 +}; +/* The field numbers in the SYS_FOREIGN_FOR_NAME secondary index */ +enum dict_fld_sys_foreign_for_name_enum { + DICT_FLD__SYS_FOREIGN_FOR_NAME__NAME = 0, + DICT_FLD__SYS_FOREIGN_FOR_NAME__ID = 1, + DICT_NUM_FIELDS__SYS_FOREIGN_FOR_NAME = 2 +}; +/* The columns in SYS_FOREIGN_COLS */ +enum dict_col_sys_foreign_cols_enum { + DICT_COL__SYS_FOREIGN_COLS__ID = 0, + DICT_COL__SYS_FOREIGN_COLS__POS = 1, + DICT_COL__SYS_FOREIGN_COLS__FOR_COL_NAME = 2, + DICT_COL__SYS_FOREIGN_COLS__REF_COL_NAME = 3, + DICT_NUM_COLS__SYS_FOREIGN_COLS = 4 +}; +/* The field numbers in the SYS_FOREIGN_COLS clustered index */ +enum dict_fld_sys_foreign_cols_enum { + DICT_FLD__SYS_FOREIGN_COLS__ID = 0, + DICT_FLD__SYS_FOREIGN_COLS__POS = 1, + DICT_FLD__SYS_FOREIGN_COLS__DB_TRX_ID = 2, + DICT_FLD__SYS_FOREIGN_COLS__DB_ROLL_PTR = 3, + DICT_FLD__SYS_FOREIGN_COLS__FOR_COL_NAME = 4, + DICT_FLD__SYS_FOREIGN_COLS__REF_COL_NAME = 5, + DICT_NUM_FIELDS__SYS_FOREIGN_COLS = 6 +}; /* When a row id which is zero modulo this number (which must be a power of two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic index d3ba9eee78f..0f660ab7555 100644 --- a/storage/innobase/include/dict0boot.ic +++ b/storage/innobase/include/dict0boot.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h index cce1246b789..68fc9ba195a 100644 --- a/storage/innobase/include/dict0crea.h +++ b/storage/innobase/include/dict0crea.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic index c5365ce7489..98cbbf28208 100644 --- a/storage/innobase/include/dict0crea.ic +++ b/storage/innobase/include/dict0crea.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 89d6fc66635..073b68c26ad 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -27,6 +27,7 @@ Created 1/8/1996 Heikki Tuuri #define dict0dict_h #include "univ.i" +#include "db0err.h" #include "dict0types.h" #include "dict0mem.h" #include "data0type.h" @@ -73,16 +74,16 @@ Returns a table object based on table id. @return table, NULL if does not exist */ UNIV_INTERN dict_table_t* -dict_table_get_on_id( -/*=================*/ - table_id_t table_id, /*!< in: table id */ - trx_t* trx); /*!< in: transaction handle */ +dict_table_open_on_id( +/*==================*/ + table_id_t table_id, /*!< in: table id */ + ibool dict_locked); /*!< in: TRUE=data dictionary locked */ /********************************************************************//** -Decrements the count of open MySQL handles to a table. */ +Decrements the count of open handles to a table. */ UNIV_INTERN void -dict_table_decrement_handle_count( -/*==============================*/ +dict_table_close( +/*=============*/ dict_table_t* table, /*!< in/out: table */ ibool dict_locked); /*!< in: TRUE=data dictionary locked */ /**********************************************************************//** @@ -138,7 +139,7 @@ dict_col_copy_type( dtype_t* type); /*!< out: data type */ /**********************************************************************//** Determine bytes of column prefix to be stored in the undo log. Please -note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix +note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix needs to be stored in the undo log. @return bytes of column prefix to be stored in the undo log */ UNIV_INLINE @@ -280,8 +281,9 @@ UNIV_INTERN void dict_table_add_to_cache( /*====================*/ - dict_table_t* table, /*!< in: table */ - mem_heap_t* heap); /*!< in: temporary heap */ + dict_table_t* table, /*!< in: table */ + ibool can_be_evicted, /*!< in: TRUE if can be evicted*/ + mem_heap_t* heap); /*!< in: temporary heap */ /**********************************************************************//** Removes a table object from the dictionary cache. */ UNIV_INTERN @@ -360,6 +362,16 @@ dict_table_replace_index_in_foreign_list( dict_table_t* table, /*!< in/out: table */ dict_index_t* index, /*!< in: index to be replaced */ const trx_t* trx); /*!< in: transaction handle */ +/**********************************************************************//** +Determines whether a string starts with the specified keyword. +@return TRUE if str starts with keyword */ +UNIV_INTERN +ibool +dict_str_starts_with_keyword( +/*=========================*/ + void* mysql_thd, /*!< in: MySQL thread handle */ + const char* str, /*!< in: string to scan for keyword */ + const char* keyword); /*!< in: keyword to look for */ /*********************************************************************//** Checks if a index is defined for a foreign key constraint. Index is a part of a foreign key constraint if the index is referenced by foreign key @@ -417,66 +429,32 @@ dict_foreign_parse_drop_constraints( const char*** constraints_to_drop); /*!< out: id's of the constraints to drop */ /**********************************************************************//** -Returns a table object and optionally increment its MySQL open handle count. +Returns a table object and increments its open handle count. NOTE! This is a high-level function to be used mainly from outside the -'dict' directory. Inside this directory dict_table_get_low is usually the -appropriate function. +'dict' directory. Inside this directory dict_table_get_low +is usually the appropriate function. @return table, NULL if does not exist */ UNIV_INTERN dict_table_t* -dict_table_get( -/*===========*/ +dict_table_open_on_name( +/*====================*/ const char* table_name, /*!< in: table name */ - ibool inc_mysql_count); - /*!< in: whether to increment the open - handle count on the table */ + ibool dict_locked); /*!< in: TRUE=data dictionary locked */ + /**********************************************************************//** -Returns a index object, based on table and index id, and memoryfixes it. -@return index, NULL if does not exist */ +Returns a table object and increment its open handle count. Table +statistics will not be updated if they are not initialized. +Call this function when dropping a table. +@return table, NULL if does not exist */ UNIV_INTERN -dict_index_t* -dict_index_get_on_id_low( -/*=====================*/ - dict_table_t* table, /*!< in: table */ - index_id_t index_id); /*!< in: index id */ -/**********************************************************************//** -Checks if a table is in the dictionary cache. -@return table, NULL if not found */ - -UNIV_INLINE dict_table_t* -dict_table_check_if_in_cache_low( +dict_table_open_on_name_no_stats( /*=============================*/ - const char* table_name); /*!< in: table name */ -/**********************************************************************//** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. -@return table, NULL if not found */ -UNIV_INLINE -dict_table_t* -dict_table_get_low_ignore_err( -/*===========================*/ const char* table_name, /*!< in: table name */ + ibool dict_locked, /*!< in: TRUE=data dictionary locked */ dict_err_ignore_t ignore_err); /*!< in: error to be ignored when - loading a table definition */ -/**********************************************************************//** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. -@return table, NULL if not found */ -UNIV_INLINE -dict_table_t* -dict_table_get_low( -/*===============*/ - const char* table_name); /*!< in: table name */ -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INLINE -dict_table_t* -dict_table_get_on_id_low( -/*=====================*/ - table_id_t table_id); /*!< in: table id */ + loading the table */ /**********************************************************************//** Find an index that is equivalent to the one passed in and is not marked for deletion. @@ -627,6 +605,15 @@ dict_index_is_ibuf( const dict_index_t* index) /*!< in: index */ __attribute__((nonnull, pure, warn_unused_result)); /********************************************************************//** +Check whether the index is an universal index tree. +@return nonzero for universal tree, zero for other indexes */ +UNIV_INLINE +ulint +dict_index_is_univ( +/*===============*/ + const dict_index_t* index) /*!< in: index */ + __attribute__((nonnull, pure, warn_unused_result)); +/********************************************************************//** Check whether the index is a secondary index or the insert buffer tree. @return nonzero for insert buffer, zero for other indexes */ UNIV_INLINE @@ -636,6 +623,16 @@ dict_index_is_sec_or_ibuf( const dict_index_t* index) /*!< in: index */ __attribute__((nonnull, pure, warn_unused_result)); +/************************************************************************ +Gets the all the FTS indexes for the table. NOTE: must not be called for +tables which do not have an FTS-index. */ + +ulint +dict_table_get_all_fts_indexes( +/*===========================*/ + /* out: number of indexes collected */ + dict_table_t* table, /* in: table */ + ib_vector_t* indexes);/* out: vector for collecting FTS indexes */ /********************************************************************//** Gets the number of user-defined columns in a table in the dictionary cache. @@ -726,21 +723,54 @@ dict_table_get_format( /*==================*/ const dict_table_t* table); /*!< in: table */ /********************************************************************//** -Set the file format of a table. */ +Determine the file format from a dict_table_t::flags. +@return file format version */ +UNIV_INLINE +ulint +dict_tf_get_format( +/*===============*/ + ulint flags); /*!< in: dict_table_t::flags */ +/********************************************************************//** +Set the various values in a dict_table_t::flags pointer. */ UNIV_INLINE void -dict_table_set_format( -/*==================*/ - dict_table_t* table, /*!< in/out: table */ - ulint format);/*!< in: file format version */ +dict_tf_set( +/*========*/ + ulint* flags, /*!< in/out: table */ + rec_format_t format, /*!< in: file format */ + ulint zip_ssize); /*!< in: zip shift size */ +/********************************************************************//** +Convert a 32 bit integer table flags to the 32 bit integer that is +written into the tablespace header at the offset FSP_SPACE_FLAGS and is +also stored in the fil_space_t::flags field. The following chart shows +the translation of the low order bit. Other bits are the same. +========================= Low order bit ========================== + | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC +dict_table_t::flags | 0 | 1 | 1 | 1 +fil_space_t::flags | 0 | 0 | 1 | 1 +================================================================== +@return tablespace flags (fil_space_t::flags) */ +UNIV_INLINE +ulint +dict_tf_to_fsp_flags( +/*=================*/ + ulint flags) /*!< in: dict_table_t::flags */ + __attribute__((const)); +/********************************************************************/ +UNIV_INLINE +ulint +dict_tf_to_sys_tables_type( +/*=======================*/ + ulint flags) /*!< in: dict_table_t::flags */ + __attribute__((const)); /********************************************************************//** Extract the compressed page size from table flags. @return compressed page size, or 0 if not compressed */ UNIV_INLINE ulint -dict_table_flags_to_zip_size( -/*=========================*/ - ulint flags) /*!< in: flags */ +dict_tf_get_zip_size( +/*=================*/ + ulint flags) /*!< in: flags */ __attribute__((const)); /********************************************************************//** Check whether the table uses the compressed compact page format. @@ -750,6 +780,7 @@ ulint dict_table_zip_size( /*================*/ const dict_table_t* table); /*!< in: table */ +#ifndef UNIV_HOTBACKUP /*********************************************************************//** Obtain exclusive locks on all index trees of the table. This is to prevent accessing index trees while InnoDB is updating internal metadata for @@ -776,7 +807,43 @@ dict_table_col_in_clustered_key( /*============================*/ const dict_table_t* table, /*!< in: table */ ulint n); /*!< in: column number */ -#ifndef UNIV_HOTBACKUP +/*******************************************************************//** +Check if the table has an FTS index. +@return TRUE if table has an FTS index */ +UNIV_INLINE +ibool +dict_table_has_fts_index( +/*=====================*/ + dict_table_t* table); /*!< in: table */ +/*******************************************************************//** +Validate and return the table flags. +@return Same as input after validating it as dict_table_t::flags. +If there is an error, trigger assertion failure. */ +UNIV_INLINE +ulint +dict_tf_validate( +/*=============*/ + ulint flags); /*!< in: table flags */ +/********************************************************************//** +Validate a SYS_TABLES TYPE field and return it. +@return Same as input after validating it as a SYS_TABLES TYPE field. +If there is an error, return ULINT_UNDEFINED. */ +UNIV_INLINE +ulint +dict_sys_tables_type_validate( +/*==========================*/ + ulint type, /*!< in: SYS_TABLES.TYPE */ + ulint n_cols); /*!< in: SYS_TABLES.N_COLS */ +/********************************************************************//** +Determine the file format from dict_table_t::flags +The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any +other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set. +@return file format version */ +UNIV_INLINE +rec_format_t +dict_tf_get_rec_format( +/*===================*/ + ulint flags); /*!< in: dict_table_t::flags */ /*******************************************************************//** Copies types of columns contained in table to tuple and sets all fields of the tuple to the SQL NULL value. This function should @@ -787,6 +854,17 @@ dict_table_copy_types( /*==================*/ dtuple_t* tuple, /*!< in/out: data tuple */ const dict_table_t* table); /*!< in: table */ +/******************************************************************** +Wait until all the background threads of the given table have exited, i.e., +bg_threads == 0. Note: bg_threads_mutex must be reserved when +calling this. */ + +void +dict_table_wait_for_bg_threads_to_exit( +/*===================================*/ + dict_table_t* table, /* in: table */ + ulint delay); /* in: time in microseconds to wait between + checks of bg_threads. */ /**********************************************************************//** Looks for an index with the given id. NOTE that we do not reserve the dictionary mutex: this function is for emergency purposes like @@ -798,6 +876,17 @@ dict_index_find_on_id_low( /*======================*/ index_id_t id); /*!< in: index id */ /**********************************************************************//** +Make room in the table cache by evicting an unused table. The unused table +should not be part of FK relationship and currently not used in any user +transaction. There is no guarantee that it will remove a table. +@return number of tables evicted. */ +UNIV_INTERN +ulint +dict_make_room_in_cache( +/*====================*/ + ulint max_tables, /*!< in: max tables allowed in cache */ + ulint pct_check); /*!< in: max percent to check */ +/**********************************************************************//** Adds an index to the dictionary cache. @return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */ UNIV_INTERN @@ -901,13 +990,25 @@ dict_index_get_nth_col_no( Looks for column n in an index. @return position in internal representation of the index; ULINT_UNDEFINED if not contained */ -UNIV_INTERN +UNIV_INLINE ulint dict_index_get_nth_col_pos( /*=======================*/ const dict_index_t* index, /*!< in: index */ ulint n); /*!< in: column number */ /********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INTERN +ulint +dict_index_get_nth_col_or_prefix_pos( +/*=================================*/ + const dict_index_t* index, /*!< in: index */ + ulint n, /*!< in: column number */ + ibool inc_prefix); /*!< in: TRUE=consider + column prefixes too */ +/********************************************************************//** Returns TRUE if the index contains a column or a prefix of that column. @return TRUE if contains the column or its prefix */ UNIV_INTERN @@ -1110,18 +1211,6 @@ ulint dict_index_calc_min_rec_len( /*========================*/ const dict_index_t* index); /*!< in: index */ -/*********************************************************************//** -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ -UNIV_INTERN -void -dict_update_statistics( -/*===================*/ - dict_table_t* table, /*!< in/out: table */ - ibool only_calc_if_missing_stats);/*!< in: only - update/recalc the stats if they have - not been initialized yet, otherwise - do nothing */ /********************************************************************//** Reserves the dictionary system mutex for MySQL. */ UNIV_INTERN @@ -1192,6 +1281,37 @@ dict_table_get_index_on_name_and_min_id( /*====================================*/ dict_table_t* table, /*!< in: table */ const char* name); /*!< in: name of the index to find */ +/*************************************************************** +Check whether a column exists in an FTS index. */ +UNIV_INLINE +ulint +dict_table_is_fts_column( +/*=====================*/ + /* out: ULINT_UNDEFINED if no match else + the offset within the vector */ + ib_vector_t* indexes,/* in: vector containing only FTS indexes */ + ulint col_no);/* in: col number to search for */ +/**********************************************************************//** +Move a table to the non LRU end of the LRU list. */ +UNIV_INTERN +void +dict_table_move_from_lru_to_non_lru( +/*================================*/ + dict_table_t* table); /*!< in: table to move from LRU to non-LRU */ +/**********************************************************************//** +Move a table to the LRU list from the non-LRU list. */ +UNIV_INTERN +void +dict_table_move_from_non_lru_to_lru( +/*================================*/ + dict_table_t* table); /*!< in: table to move from non-LRU to LRU */ +/**********************************************************************//** +Move to the most recently used segment of the LRU list. */ +UNIV_INTERN +void +dict_move_to_mru( +/*=============*/ + dict_table_t* table); /*!< in: table to move to MRU */ /* Buffers for storing detailed information about the latest foreign key and unique key errors */ extern FILE* dict_foreign_err_file; @@ -1221,8 +1341,6 @@ struct dict_sys_struct{ on name */ hash_table_t* table_id_hash; /*!< hash table of the tables, based on id */ - UT_LIST_BASE_NODE_T(dict_table_t) - table_LRU; /*!< LRU list of tables */ ulint size; /*!< varying space in bytes occupied by the data dictionary table and index objects */ @@ -1230,6 +1348,14 @@ struct dict_sys_struct{ dict_table_t* sys_columns; /*!< SYS_COLUMNS table */ dict_table_t* sys_indexes; /*!< SYS_INDEXES table */ dict_table_t* sys_fields; /*!< SYS_FIELDS table */ + + /*=============================*/ + UT_LIST_BASE_NODE_T(dict_table_t) + table_LRU; /*!< List of tables that can be evicted + from the cache */ + UT_LIST_BASE_NODE_T(dict_table_t) + table_non_LRU; /*!< List of tables that can't be + evicted from the cache */ }; #endif /* !UNIV_HOTBACKUP */ @@ -1245,6 +1371,56 @@ void dict_ind_init(void); /*===============*/ +/* Auxiliary structs for checking a table definition @{ */ + +/* This struct is used to specify the name and type that a column must +have when checking a table's schema. */ +struct dict_col_meta_struct { + const char* name; /* column name */ + ulint mtype; /* required column main type */ + ulint prtype_mask; /* required column precise type mask; + if this is non-zero then all the + bits it has set must also be set + in the column's prtype */ + ulint len; /* required column length */ +}; +typedef struct dict_col_meta_struct dict_col_meta_t; + +/* This struct is used for checking whether a given table exists and +whether it has a predefined schema (number of columns and columns names +and types) */ +struct dict_table_schema_struct { + const char* table_name; /* the name of the table whose + structure we are checking */ + ulint n_cols; /* the number of columns the + table must have */ + dict_col_meta_t* columns; /* metadata for the columns; + this array has n_cols + elements */ +}; +typedef struct dict_table_schema_struct dict_table_schema_t; +/* @} */ + +/*********************************************************************//** +Checks whether a table exists and whether it has the given structure. +The table must have the same number of columns with the same names and +types. The order of the columns does not matter. +The caller must own the dictionary mutex. +dict_table_schema_check() @{ +@return DB_SUCCESS if the table exists and contains the necessary columns */ +UNIV_INTERN +enum db_err +dict_table_schema_check( +/*====================*/ + dict_table_schema_t* req_schema, /*!< in/out: required table + schema */ + char* errstr, /*!< out: human readable error + message if != DB_SUCCESS and + != DB_TABLE_NOT_FOUND is + returned */ + size_t errstr_sz); /*!< in: errstr size */ +/* @} */ + /**********************************************************************//** Closes the data dictionary module. */ UNIV_INTERN diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 7533ce01401..f6585ea8205 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -27,6 +27,7 @@ Created 1/8/1996 Heikki Tuuri #ifndef UNIV_HOTBACKUP #include "dict0load.h" #include "rem0types.h" +#include "fsp0fsp.h" #include "srv0srv.h" /*********************************************************************//** @@ -103,7 +104,7 @@ dict_col_type_assert_equal( ut_ad(col->mtype == type->mtype); ut_ad(col->prtype == type->prtype); - ut_ad(col->len == type->len); + //ut_ad(col->len == type->len); # ifndef UNIV_HOTBACKUP ut_ad(col->mbminmaxlen == type->mbminmaxlen); # endif /* !UNIV_HOTBACKUP */ @@ -145,7 +146,7 @@ ulint dict_col_get_fixed_size( /*====================*/ const dict_col_t* col, /*!< in: column */ - ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ + ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */ { return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len, col->mbminmaxlen, comp)); @@ -250,7 +251,7 @@ dict_index_is_clust( ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED)); + return(index->type & DICT_CLUSTERED); } /********************************************************************//** Check whether the index is unique. @@ -264,7 +265,7 @@ dict_index_is_unique( ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - return(UNIV_UNLIKELY(index->type & DICT_UNIQUE)); + return(index->type & DICT_UNIQUE); } /********************************************************************//** @@ -279,7 +280,22 @@ dict_index_is_ibuf( ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - return(UNIV_UNLIKELY(index->type & DICT_IBUF)); + return(index->type & DICT_IBUF); +} + +/********************************************************************//** +Check whether the index is an universal index tree. +@return nonzero for universal tree, zero for other indexes */ +UNIV_INLINE +ulint +dict_index_is_univ( +/*===============*/ + const dict_index_t* index) /*!< in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(index->type & DICT_UNIVERSAL); } /********************************************************************//** @@ -298,7 +314,7 @@ dict_index_is_sec_or_ibuf( type = index->type; - return(UNIV_LIKELY(!(type & DICT_CLUSTERED) || (type & DICT_IBUF))); + return(!(type & DICT_CLUSTERED) || (type & DICT_IBUF)); } /********************************************************************//** @@ -420,11 +436,185 @@ dict_table_is_comp( { ut_ad(table); -#if DICT_TF_COMPACT != TRUE -#error +#if DICT_TF_COMPACT != 1 +#error "DICT_TF_COMPACT must be 1" #endif - return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT)); + return(table->flags & DICT_TF_COMPACT); +} + +/************************************************************************ +Check if the table has an FTS index. */ +UNIV_INLINE +ibool +dict_table_has_fts_index( +/*=====================*/ + /* out: TRUE if table has an FTS index */ + dict_table_t* table) /* in: table */ +{ + ut_ad(table); + + return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS)); +} + +/********************************************************************//** +Validate and return the table flags. +@return Same as input after validating it as dict_table_t::flags. +If there is an error, trigger assertion failure. */ +UNIV_INLINE +ulint +dict_tf_validate( +/*=============*/ + ulint flags) /*!< in: table flags */ +{ + ulint compact = DICT_TF_GET_COMPACT(flags); + ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags); + ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags); + ulint unused = DICT_TF_GET_UNUSED(flags); + + /* Make sure there are no bits that we do not know about. */ + ut_a(unused == 0); + + if (atomic_blobs) { + /* Barracuda row formats COMPRESSED and DYNAMIC build on + the page structure introduced for the COMPACT row format + by allowing keys in secondary indexes to be made from + data stored off-page in the clustered index. */ + ut_a(compact); + } else { + /* Antelope does not support COMPRESSED row format. */ + ut_a(!zip_ssize); + } + + if (zip_ssize) { + /* COMPRESSED row format must have compact and atomic_blobs + bits set. */ + ut_a(compact); + ut_a(atomic_blobs); + + /* Validate the number is within allowed range. */ + ut_a(zip_ssize <= PAGE_ZIP_SSIZE_MAX); + } + + /* Return the flags sent if we did not crash. */ + return(flags); +} + +/********************************************************************//** +Validate a SYS_TABLES TYPE field and return it. +@return Same as input after validating it as a SYS_TABLES TYPE field. +If there is an error, return ULINT_UNDEFINED. */ +UNIV_INLINE +ulint +dict_sys_tables_type_validate( +/*==========================*/ + ulint type, /*!< in: SYS_TABLES.TYPE */ + ulint n_cols) /*!< in: SYS_TABLES.N_COLS */ +{ + ulint low_order_bit = DICT_TF_GET_COMPACT(type); + ulint redundant = !(n_cols & DICT_N_COLS_COMPACT); + ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(type); + ulint atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type); + ulint unused = DICT_TF_GET_UNUSED(type); + + /* If the format is UNIV_FORMAT_A, table->flags == 0, but + SYS_TABLES.TYPE == 1, which is defined as SYS_TABLE_TYPE_ANTELOPE. + The low order bit of SYS_TABLES.TYPE is always set to 1. + If the format is UNIV_FORMAT_B or higher, this field is the same + as dict_table_t::flags. Zero is not allowed here. */ + if (!low_order_bit) { + return(ULINT_UNDEFINED); + } + + if (redundant) { + /* This is Redundant row format, only the first bit + should be set in SYS_TABLES.TYPE */ + if (type != SYS_TABLE_TYPE_ANTELOPE) { + return(ULINT_UNDEFINED); + } + return(DICT_TF_REDUNDANT); + } + + /* Make sure there are no bits that we do not know about. */ + if (unused) { + return(ULINT_UNDEFINED); + } + + if (atomic_blobs) { + /* Barracuda row formats COMPRESSED and DYNAMIC build on + the page structure introduced for the COMPACT row format + by allowing keys in secondary indexes to be made from + data stored off-page in the clustered index. + + The DICT_N_COLS_COMPACT flag should be in N_COLS, + but we already know that. */ + + } else if (zip_ssize) { + /* Antelope does not support COMPRESSED format. */ + return(ULINT_UNDEFINED); + } + + if (zip_ssize) { + /* COMPRESSED row format must have low_order_bit and + atomic_blobs bits set and the DICT_N_COLS_COMPACT flag + should be in N_COLS, but we already know about the + low_order_bit and DICT_N_COLS_COMPACT flags. */ + if (!atomic_blobs) { + return(ULINT_UNDEFINED); + } + + /* Validate that the number is within allowed range. */ + if (zip_ssize > PAGE_ZIP_SSIZE_MAX) { + return(ULINT_UNDEFINED); + } + } + + /* Return the validated SYS_TABLES.TYPE. */ + return(type); +} + +/********************************************************************//** +Determine the file format from dict_table_t::flags +The low order bit will be zero for REDUNDANT and 1 for COMPACT. For any +other row_format, file_format is > 0 and DICT_TF_COMPACT will also be set. +@return file format version */ +UNIV_INLINE +rec_format_t +dict_tf_get_rec_format( +/*===================*/ + ulint flags) /*!< in: dict_table_t::flags */ +{ + dict_tf_validate(flags); + + if (!DICT_TF_GET_COMPACT(flags)) { + return(REC_FORMAT_REDUNDANT); + } + + if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) { + return(REC_FORMAT_COMPACT); + } + + if (DICT_TF_GET_ZIP_SSIZE(flags)) { + return(REC_FORMAT_COMPRESSED); + } + + return(REC_FORMAT_DYNAMIC); +} + +/********************************************************************//** +Determine the file format from a dict_table_t::flags. +@return file format version */ +UNIV_INLINE +ulint +dict_tf_get_format( +/*===============*/ + ulint flags) /*!< in: dict_table_t::flags */ +{ + if (DICT_TF_HAS_ATOMIC_BLOBS(flags)) { + return(UNIV_FORMAT_B); + } + + return(UNIV_FORMAT_A); } /********************************************************************//** @@ -438,41 +628,109 @@ dict_table_get_format( { ut_ad(table); - return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT); + return(dict_tf_get_format(table->flags)); } /********************************************************************//** -Determine the file format of a table. */ +Set the file format and zip size in a dict_table_t::flags. If zip size +is not needed, it should be 0. */ UNIV_INLINE void -dict_table_set_format( -/*==================*/ - dict_table_t* table, /*!< in/out: table */ - ulint format) /*!< in: file format version */ +dict_tf_set( +/*========*/ + ulint* flags, /*!< in/out: table flags */ + rec_format_t format, /*!< in: file format */ + ulint zip_ssize) /*!< in: zip shift size */ { - ut_ad(table); + switch (format) { + case REC_FORMAT_REDUNDANT: + *flags = 0; + ut_ad(zip_ssize == 0); + break; + case REC_FORMAT_COMPACT: + *flags = DICT_TF_COMPACT; + ut_ad(zip_ssize == 0); + break; + case REC_FORMAT_COMPRESSED: + *flags = DICT_TF_COMPACT + | (1 << DICT_TF_POS_ATOMIC_BLOBS) + | (zip_ssize << DICT_TF_POS_ZIP_SSIZE); + break; + case REC_FORMAT_DYNAMIC: + *flags = DICT_TF_COMPACT + | (1 << DICT_TF_POS_ATOMIC_BLOBS); + ut_ad(zip_ssize == 0); + break; + } +} + +/********************************************************************//** +Convert a 32 bit integer table flags to the 32 bit integer that is +written into the tablespace header at the offset FSP_SPACE_FLAGS and is +also stored in the fil_space_t::flags field. The following chart shows +the translation of the low order bit. Other bits are the same. +========================= Low order bit ========================== + | REDUNDANT | COMPACT | COMPRESSED | DYNAMIC +dict_table_t::flags | 0 | 1 | 1 | 1 +fil_space_t::flags | 0 | 0 | 1 | 1 +================================================================== +@return tablespace flags (fil_space_t::flags) */ +UNIV_INLINE +ulint +dict_tf_to_fsp_flags( +/*=================*/ + ulint flags) /*!< in: dict_table_t::flags */ +{ + /* Adjust bit zero. */ + flags = (flags == DICT_TF_COMPACT) ? 0 : flags; + + /* In addition, tablespace flags also contain the page size. */ + flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE); - table->flags = (table->flags & ~DICT_TF_FORMAT_MASK) - | (format << DICT_TF_FORMAT_SHIFT); + return(fsp_flags_validate(flags)); } /********************************************************************//** -Extract the compressed page size from table flags. +Convert a 32 bit integer table flags to the 32bit integer that is written +to a SYS_TABLES.TYPE field. The following chart shows the translation of +the low order bit. Other bits are the same. +========================= Low order bit ========================== + | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC +dict_table_t::flags | 0 | 1 | 1 +SYS_TABLES.TYPE | 1 | 1 | 1 +================================================================== +@return ulint containing SYS_TABLES.TYPE */ +UNIV_INLINE +ulint +dict_tf_to_sys_tables_type( +/*=======================*/ + ulint flags) /*!< in: dict_table_t::flags */ +{ + if (!DICT_TF_HAS_ATOMIC_BLOBS(flags)) { + ut_a(flags == DICT_TF_REDUNDANT + || flags == DICT_TF_COMPACT); + return(SYS_TABLE_TYPE_ANTELOPE); + } + + return(dict_tf_validate(flags)); +} + +/********************************************************************//** +Extract the compressed page size from dict_table_t::flags. +These flags are in memory, so assert that they are valid. @return compressed page size, or 0 if not compressed */ UNIV_INLINE ulint -dict_table_flags_to_zip_size( -/*=========================*/ +dict_tf_get_zip_size( +/*=================*/ ulint flags) /*!< in: flags */ { - ulint zip_size = flags & DICT_TF_ZSSIZE_MASK; + ulint zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags); + ulint zip_size = (zip_ssize + ? (UNIV_ZIP_SIZE_MIN >> 1) << zip_ssize + : 0); - if (UNIV_UNLIKELY(zip_size)) { - zip_size = ((PAGE_ZIP_MIN_SIZE >> 1) - << (zip_size >> DICT_TF_ZSSIZE_SHIFT)); - - ut_ad(zip_size <= UNIV_PAGE_SIZE); - } + ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); return(zip_size); } @@ -488,9 +746,10 @@ dict_table_zip_size( { ut_ad(table); - return(dict_table_flags_to_zip_size(table->flags)); + return(dict_tf_get_zip_size(table->flags)); } +#ifndef UNIV_HOTBACKUP /*********************************************************************//** Obtain exclusive locks on all index trees of the table. This is to prevent accessing index trees while InnoDB is updating internal metadata for @@ -533,6 +792,8 @@ dict_table_x_unlock_indexes( rw_lock_x_unlock(dict_index_get_lock(index)); } } +#endif /* !UNIV_HOTBACKUP */ + /********************************************************************//** Gets the number of fields in the internal representation of an index, including fields added by the dictionary system. @@ -642,7 +903,7 @@ dict_index_get_sys_col_pos( { ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(!(index->type & DICT_UNIVERSAL)); + ut_ad(!dict_index_is_univ(index)); if (dict_index_is_clust(index)) { @@ -695,6 +956,20 @@ dict_index_get_nth_col_no( return(dict_col_get_no(dict_index_get_nth_col(index, pos))); } +/********************************************************************//** +Looks for column n in an index. +@return position in internal representation of the index; +ULINT_UNDEFINED if not contained */ +UNIV_INLINE +ulint +dict_index_get_nth_col_pos( +/*=======================*/ + const dict_index_t* index, /*!< in: index */ + ulint n) /*!< in: column number */ +{ + return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE)); +} + #ifndef UNIV_HOTBACKUP /********************************************************************//** Returns the minimum data size of an index record. @@ -790,129 +1065,35 @@ dict_index_get_space_reserve(void) } /**********************************************************************//** -Checks if a table is in the dictionary cache. -@return table, NULL if not found */ +Check whether a column exists in an FTS index. +@return ULINT_UNDEFINED if no match else the offset within the vector */ UNIV_INLINE -dict_table_t* -dict_table_check_if_in_cache_low( -/*=============================*/ - const char* table_name) /*!< in: table name */ -{ - dict_table_t* table; - ulint table_fold; - - ut_ad(table_name); - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* Look for the table name in the hash table */ - table_fold = ut_fold_string(table_name); - - HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, - dict_table_t*, table, ut_ad(table->cached), - !strcmp(table->name, table_name)); - return(table); -} +ulint +dict_table_is_fts_column( +/*=====================*/ + ib_vector_t* indexes,/*!< in: vector containing only FTS indexes */ + ulint col_no) /*!< in: col number to search for */ -/**********************************************************************//** -load a table into dictionary cache, ignore any error specified during load; -@return table, NULL if not found */ -UNIV_INLINE -dict_table_t* -dict_table_get_low_ignore_err( -/*==========================*/ - const char* table_name, /*!< in: table name */ - dict_err_ignore_t - ignore_err) /*!< in: error to be ignored when - loading a table definition */ { - dict_table_t* table; + ulint i; - ut_ad(table_name); - ut_ad(mutex_own(&(dict_sys->mutex))); + for (i = 0; i < ib_vector_size(indexes); ++i) { + dict_index_t* index; - table = dict_table_check_if_in_cache_low(table_name); - - if (table == NULL) { - table = dict_load_table(table_name, TRUE, ignore_err); - } + index = (dict_index_t*) ib_vector_getp(indexes, i); - ut_ad(!table || table->cached); - - return(table); -} - -/**********************************************************************//** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. -@return table, NULL if not found */ -UNIV_INLINE -dict_table_t* -dict_table_get_low( -/*===============*/ - const char* table_name) /*!< in: table name */ -{ - dict_table_t* table; - - ut_ad(table_name); - ut_ad(mutex_own(&(dict_sys->mutex))); + if (dict_index_contains_col_or_prefix(index, col_no)) { - table = dict_table_check_if_in_cache_low(table_name); - - if (table && table->corrupted) { - fprintf(stderr, "InnoDB: table"); - ut_print_name(stderr, NULL, TRUE, table->name); - if (srv_load_corrupted) { - fputs(" is corrupted, but" - " innodb_force_load_corrupted is set\n", stderr); - } else { - fputs(" is corrupted\n", stderr); - return(NULL); + return(i); } } - if (table == NULL) { - table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE); - } - - ut_ad(!table || table->cached); - - return(table); -} - -/**********************************************************************//** -Returns a table object based on table id. -@return table, NULL if does not exist */ -UNIV_INLINE -dict_table_t* -dict_table_get_on_id_low( -/*=====================*/ - table_id_t table_id) /*!< in: table id */ -{ - dict_table_t* table; - ulint fold; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* Look for the table name in the hash table */ - fold = ut_fold_ull(table_id); - - HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, - dict_table_t*, table, ut_ad(table->cached), - table->id == table_id); - if (table == NULL) { - table = dict_load_table_on_id(table_id); - } - - ut_ad(!table || table->cached); - - /* TODO: should get the type information from MySQL */ - - return(table); + return(ULINT_UNDEFINED); } /**********************************************************************//** Determine bytes of column prefix to be stored in the undo log. Please -note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix +note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix needs to be stored in the undo log. @return bytes of column prefix to be stored in the undo log */ UNIV_INLINE @@ -923,9 +1104,9 @@ dict_max_field_len_store_undo( const dict_col_t* col) /*!< in: column which index prefix is based on */ { - ulint prefix_len = 0; + ulint prefix_len = 0; - if (dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP) + if (dict_table_get_format(table) >= UNIV_FORMAT_B) { prefix_len = col->max_prefix ? col->max_prefix @@ -947,7 +1128,7 @@ dict_table_is_corrupted( ut_ad(table); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - return(UNIV_UNLIKELY(table->corrupted)); + return(table->corrupted); } /********************************************************************//** @@ -962,8 +1143,8 @@ dict_index_is_corrupted( ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - return(UNIV_UNLIKELY((index->type & DICT_CORRUPT) - || (index->table && index->table->corrupted))); + return((index->type & DICT_CORRUPT) + || (index->table && index->table->corrupted)); } #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h index 16177ade713..13b9a121c1c 100644 --- a/storage/innobase/include/dict0load.h +++ b/storage/innobase/include/dict0load.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -48,7 +48,7 @@ enum dict_system_table_id { typedef enum dict_system_table_id dict_system_id_t; -/** Status bit for dict_process_sys_tables_rec() */ +/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */ enum dict_table_info { DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t structure with information from @@ -155,12 +155,7 @@ dict_load_field_low( byte* last_index_id, /*!< in: last index id */ mem_heap_t* heap, /*!< in/out: memory heap for temporary storage */ - const rec_t* rec, /*!< in: SYS_FIELDS record */ - char* addition_err_str,/*!< out: additional error message - that requires information to be - filled, or NULL */ - ulint err_str_len); /*!< in: length of addition_err_str - in bytes */ + const rec_t* rec); /*!< in: SYS_FIELDS record */ /********************************************************************//** Loads a table definition and also all its index definitions, and also the cluster definition if the table is a member in a cluster. Also loads @@ -248,15 +243,17 @@ both monitor table output and information schema innodb_sys_tables output. @return error message, or NULL on success */ UNIV_INTERN const char* -dict_process_sys_tables_rec( -/*========================*/ +dict_process_sys_tables_rec_and_mtr_commit( +/*=======================================*/ mem_heap_t* heap, /*!< in: temporary memory heap */ const rec_t* rec, /*!< in: SYS_TABLES record */ dict_table_t** table, /*!< out: dict_table_t to fill */ - dict_table_info_t status); /*!< in: status bit controls + dict_table_info_t status, /*!< in: status bit controls options such as whether we shall look for dict_table_t from cache first */ + mtr_t* mtr); /*!< in/out: mini-transaction, + will be committed */ /********************************************************************//** This function parses a SYS_INDEXES record and populate a dict_index_t structure with the information from the record. For detail information diff --git a/storage/innobase/include/dict0load.ic b/storage/innobase/include/dict0load.ic index ccc16db165b..2c0f1ff38a5 100644 --- a/storage/innobase/include/dict0load.ic +++ b/storage/innobase/include/dict0load.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index 4c371c8d5cf..b770f7e3ca7 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -43,6 +43,10 @@ Created 1/8/1996 Heikki Tuuri #include "ut0byte.h" #include "hash0hash.h" #include "trx0types.h" +#include "fts0fts.h" + +/* Forward declaration. */ +typedef struct ib_rbt_struct ib_rbt_t; /** Type flags of an index: OR'ing of the flags is allowed to define a combination of types */ @@ -54,73 +58,148 @@ combination of types */ #define DICT_IBUF 8 /*!< insert buffer tree */ #define DICT_CORRUPT 16 /*!< bit to store the corrupted flag in SYS_INDEXES.TYPE */ +#define DICT_FTS 32 /* FTS index; can't be combined with the + other flags */ -#define DICT_IT_BITS 5 /*!< number of bits used for +#define DICT_IT_BITS 6 /*!< number of bits used for SYS_INDEXES.TYPE */ /* @} */ +#if 0 /* not implemented, retained for history */ /** Types for a table object */ #define DICT_TABLE_ORDINARY 1 /*!< ordinary table */ -#if 0 /* not implemented */ #define DICT_TABLE_CLUSTER_MEMBER 2 #define DICT_TABLE_CLUSTER 3 /* this means that the table is really a cluster definition */ #endif -/** Table flags. All unused bits must be 0. */ -/* @{ */ -#define DICT_TF_COMPACT 1 /* Compact page format. - This must be set for - new file formats - (later than - DICT_TF_FORMAT_51). */ +/* Table and tablespace flags are generally not used for the Antelope file +format except for the low order bit, which is used differently depending on +where the flags are stored. -/** Compressed page size (0=uncompressed, up to 15 compressed sizes) */ -/* @{ */ -#define DICT_TF_ZSSIZE_SHIFT 1 -#define DICT_TF_ZSSIZE_MASK (15 << DICT_TF_ZSSIZE_SHIFT) -#define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1) -/* @} */ +==================== Low order flags bit ========================= + | REDUNDANT | COMPACT | COMPRESSED and DYNAMIC +SYS_TABLES.TYPE | 1 | 1 | 1 +dict_table_t::flags | 0 | 1 | 1 +FSP_SPACE_FLAGS | 0 | 0 | 1 +fil_space_t::flags | 0 | 0 | 1 -/** File format */ -/* @{ */ -#define DICT_TF_FORMAT_SHIFT 5 /* file format */ -#define DICT_TF_FORMAT_MASK \ -((~(~0 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT))) << DICT_TF_FORMAT_SHIFT) -#define DICT_TF_FORMAT_51 0 /*!< InnoDB/MySQL up to 5.1 */ -#define DICT_TF_FORMAT_ZIP 1 /*!< InnoDB plugin for 5.1: - compressed tables, - new BLOB treatment */ -/** Maximum supported file format */ -#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP - -/** Minimum supported file format */ -#define DICT_TF_FORMAT_MIN DICT_TF_FORMAT_51 +Before the 5.1 plugin, SYS_TABLES.TYPE was always DICT_TABLE_ORDINARY (1) +and the tablespace flags field was always 0. In the 5.1 plugin, these fields +were repurposed to identify compressed and dynamic row formats. -/* @} */ -#define DICT_TF_BITS 6 /*!< number of flag bits */ -#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX -# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX" -#endif +The following types and constants describe the flags found in dict_table_t +and SYS_TABLES.TYPE. Similar flags found in fil_space_t and FSP_SPACE_FLAGS +are described in fsp0fsp.h. */ + +/* @{ */ +/** SYS_TABLES.TYPE can be equal to 1 which means that the Row format +is one of two Antelope row formats, Redundant or Compact. */ +#define SYS_TABLE_TYPE_ANTELOPE 1 +/** dict_table_t::flags can be equal to 0 if the row format = Redundant */ +#define DICT_TF_REDUNDANT 0 /*!< Redundant row format. */ +/** dict_table_t::flags can be equal to 1 if the row format = Compact */ +#define DICT_TF_COMPACT 1 /*!< Compact row format. */ + +/** This bitmask is used in SYS_TABLES.N_COLS to set and test whether +the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */ +#define DICT_N_COLS_COMPACT 0x80000000UL + +/** Width of the COMPACT flag */ +#define DICT_TF_WIDTH_COMPACT 1 +/** Width of the ZIP_SSIZE flag */ +#define DICT_TF_WIDTH_ZIP_SSIZE 4 +/** Width of the ATOMIC_BLOBS flag. The Antelope file formats broke up +BLOB and TEXT fields, storing the first 768 bytes in the clustered index. +Brracuda row formats store the whole blob or text field off-page atomically. +Secondary indexes are created from this external data using row_ext_t +to cache the BLOB prefixes. */ +#define DICT_TF_WIDTH_ATOMIC_BLOBS 1 +/** Width of all the currently known table flags */ +#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \ + + DICT_TF_WIDTH_ZIP_SSIZE \ + + DICT_TF_WIDTH_ATOMIC_BLOBS) + +/** A mask of all the known/used bits in table flags */ +#define DICT_TF_BIT_MASK (~(~0 << DICT_TF_BITS)) + +/** Zero relative shift position of the COMPACT field */ +#define DICT_TF_POS_COMPACT 0 +/** Zero relative shift position of the ZIP_SSIZE field */ +#define DICT_TF_POS_ZIP_SSIZE (DICT_TF_POS_COMPACT \ + + DICT_TF_WIDTH_COMPACT) +/** Zero relative shift position of the ATOMIC_BLOBS field */ +#define DICT_TF_POS_ATOMIC_BLOBS (DICT_TF_POS_ZIP_SSIZE \ + + DICT_TF_WIDTH_ZIP_SSIZE) +/** Zero relative shift position of the start of the UNUSED bits */ +#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_BLOBS \ + + DICT_TF_WIDTH_ATOMIC_BLOBS) + +/** Bit mask of the COMPACT field */ +#define DICT_TF_MASK_COMPACT \ + ((~(~0 << DICT_TF_WIDTH_COMPACT)) \ + << DICT_TF_POS_COMPACT) +/** Bit mask of the ZIP_SSIZE field */ +#define DICT_TF_MASK_ZIP_SSIZE \ + ((~(~0 << DICT_TF_WIDTH_ZIP_SSIZE)) \ + << DICT_TF_POS_ZIP_SSIZE) +/** Bit mask of the ATOMIC_BLOBS field */ +#define DICT_TF_MASK_ATOMIC_BLOBS \ + ((~(~0 << DICT_TF_WIDTH_ATOMIC_BLOBS)) \ + << DICT_TF_POS_ATOMIC_BLOBS) + +/** Return the value of the COMPACT field */ +#define DICT_TF_GET_COMPACT(flags) \ + ((flags & DICT_TF_MASK_COMPACT) \ + >> DICT_TF_POS_COMPACT) +/** Return the value of the ZIP_SSIZE field */ +#define DICT_TF_GET_ZIP_SSIZE(flags) \ + ((flags & DICT_TF_MASK_ZIP_SSIZE) \ + >> DICT_TF_POS_ZIP_SSIZE) +/** Return the value of the ATOMIC_BLOBS field */ +#define DICT_TF_HAS_ATOMIC_BLOBS(flags) \ + ((flags & DICT_TF_MASK_ATOMIC_BLOBS) \ + >> DICT_TF_POS_ATOMIC_BLOBS) +/** Return the contents of the UNUSED bits */ +#define DICT_TF_GET_UNUSED(flags) \ + (flags >> DICT_TF_POS_UNUSED) /* @} */ -/** @brief Additional table flags. +/** @brief Table Flags set number 2. These flags will be stored in SYS_TABLES.MIX_LEN. All unused flags will be written as 0. The column may contain garbage for tables created with old versions of InnoDB that only implemented -ROW_FORMAT=REDUNDANT. */ +ROW_FORMAT=REDUNDANT. InnoDB engines do not check these flags +for unknown bits in order to protect backward incompatibility. */ /* @{ */ -#define DICT_TF2_SHIFT DICT_TF_BITS - /*!< Shift value for - table->flags. */ -#define DICT_TF2_TEMPORARY 1 /*!< TRUE for tables from - CREATE TEMPORARY TABLE. */ -#define DICT_TF2_BITS (DICT_TF2_SHIFT + 1) - /*!< Total number of bits - in table->flags. */ +/** Total number of bits in table->flags2. */ +#define DICT_TF2_BITS 5 +#define DICT_TF2_BIT_MASK ~(~0 << DICT_TF2_BITS) + +/** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */ +#define DICT_TF2_TEMPORARY 1 +/** The table has an internal defined DOC ID column */ +#define DICT_TF2_FTS_HAS_DOC_ID 2 +/** The table has an FTS index */ +#define DICT_TF2_FTS 4 +/** Need to add Doc ID column for FTS index build. +This is a transient bit for index build */ +#define DICT_TF2_FTS_ADD_DOC_ID 8 +/** This bit is used during table creation to indicate that it will +use its own tablespace instead of the system tablespace. */ +#define DICT_TF2_USE_TABLESPACE 16 /* @} */ +#define DICT_TF2_FLAG_SET(table, flag) \ + (table->flags2 |= (flag)) + +#define DICT_TF2_FLAG_IS_SET(table, flag) \ + (table->flags2 & (flag)) + +#define DICT_TF2_FLAG_UNSET(table, flag) \ + (table->flags2 &= ~(flag)) + /** Tables could be chained together with Foreign key constraint. When first load the parent table, we would load all of its descedents. This could result in rescursive calls and out of stack error eventually. @@ -150,7 +229,8 @@ dict_mem_table_create( is ignored if the table is made a member of a cluster */ ulint n_cols, /*!< in: number of columns */ - ulint flags); /*!< in: table flags */ + ulint flags, /*!< in: table flags */ + ulint flags2); /*!< in: table flags2 */ /****************************************************************//** Free a table memory object. */ UNIV_INTERN @@ -273,14 +353,14 @@ struct dict_col_struct{ /** The following are copied from dtype_t, so that all bit-fields can be packed tightly. */ /* @{ */ - unsigned mtype:8; /*!< main data type */ - unsigned prtype:24; /*!< precise type; MySQL data + unsigned prtype:32; /*!< precise type; MySQL data type, charset code, flags to indicate nullability, signedness, whether this is a binary string, whether this is a true VARCHAR where MySQL uses 2 bytes to store the length */ + unsigned mtype:8; /*!< main data type */ /* the remaining fields do not affect alphabetical ordering: */ @@ -327,17 +407,16 @@ files would be at risk! */ /** Find out maximum indexed column length by its table format. For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum -field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For new -barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN -(3072) bytes */ +field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For +Barracuda row formats COMPRESSED and DYNAMIC, the length could +be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */ #define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \ - ((dict_table_get_format(table) < DICT_TF_FORMAT_ZIP) \ + ((dict_table_get_format(table) < UNIV_FORMAT_B) \ ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \ : REC_VERSION_56_MAX_INDEX_COL_LEN) #define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \ - ((((flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT)\ - < DICT_TF_FORMAT_ZIP) \ + ((DICT_TF_HAS_ATOMIC_BLOBS(flags) < UNIV_FORMAT_B) \ ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \ : REC_VERSION_56_MAX_INDEX_COL_LEN) @@ -407,14 +486,19 @@ struct dict_index_struct{ /*----------------------*/ /** Statistics for query optimization */ /* @{ */ - ib_int64_t* stat_n_diff_key_vals; + ib_uint64_t* stat_n_diff_key_vals; /*!< approximate number of different key values for this index, for each n-column prefix where n <= dict_get_n_unique(index); we periodically calculate new estimates */ - ib_int64_t* stat_n_non_null_key_vals; + ib_uint64_t* stat_n_sample_sizes; + /*!< number of pages that were sampled + to calculate each of stat_n_diff_key_vals[], + e.g. stat_n_sample_sizes[3] pages were sampled + to get the number stat_n_diff_key_vals[3]. */ + ib_uint64_t* stat_n_non_null_key_vals; /* approximate number of non-null key values for this index, for each column where n < dict_get_n_unique(index); This @@ -436,7 +520,7 @@ struct dict_index_struct{ #ifdef UNIV_BLOB_DEBUG mutex_t blobs_mutex; /*!< mutex protecting blobs */ - void* blobs; /*!< map of (page_no,heap_no,field_no) + ib_rbt_t* blobs; /*!< map of (page_no,heap_no,field_no) to first_blob_page_no; protected by blobs_mutex; @see btr_blob_dbg_t */ #endif /* UNIV_BLOB_DEBUG */ @@ -501,7 +585,6 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */ #define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */ /* @} */ - /** Data structure for a database table. Most fields will be initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_struct{ @@ -517,7 +600,8 @@ struct dict_table_struct{ unsigned space:32; /*!< space where the clustered index of the table is placed */ - unsigned flags:DICT_TF2_BITS;/*!< DICT_TF_COMPACT, ... */ + unsigned flags:DICT_TF_BITS; /*!< DICT_TF_... */ + unsigned flags2:DICT_TF2_BITS; /*!< DICT_TF2_... */ unsigned ibd_file_missing:1; /*!< TRUE if this is in a single-table tablespace and the .ibd file is missing; then @@ -532,6 +616,9 @@ struct dict_table_struct{ to the dictionary cache */ unsigned n_def:10;/*!< number of columns defined so far */ unsigned n_cols:10;/*!< number of columns */ + unsigned can_be_evicted:1; + /*!< TRUE if it's not an InnoDB system table + or a table that has no FK relationships */ unsigned corrupted:1; /*!< TRUE if table is corrupted */ dict_col_t* cols; /*!< array of column descriptions */ @@ -555,12 +642,6 @@ struct dict_table_struct{ which refer to this table */ UT_LIST_NODE_T(dict_table_t) table_LRU; /*!< node of the LRU list of tables */ - ulint n_mysql_handles_opened; - /*!< count of how many handles MySQL has opened - to this table; dropping of the table is - NOT allowed until this count gets to zero; - MySQL does NOT itself check the number of - open handles at drop */ unsigned fk_max_recusive_level:8; /*!< maximum recursive level we support when loading tables chained together with FK @@ -581,8 +662,6 @@ struct dict_table_struct{ with undo logs commits, it sets this to the value of the trx id counter for the tables it had an IX lock on */ - UT_LIST_BASE_NODE_T(lock_t) - locks; /*!< list of locks on the table */ #ifdef UNIV_DEBUG /*----------------------*/ ibool does_not_fit_in_memory; @@ -637,8 +716,8 @@ struct dict_table_struct{ whether a transaction has locked the AUTOINC lock we keep a pointer to the transaction here in the autoinc_trx variable. This is to - avoid acquiring the kernel mutex and scanning - the vector in trx_t. + avoid acquiring the lock_sys_t::mutex and + scanning the vector in trx_t. When an AUTOINC lock has to wait, the corresponding lock instance is created on @@ -662,16 +741,32 @@ struct dict_table_struct{ /*!< This counter is used to track the number of granted and pending autoinc locks on this table. This value is set after acquiring the - kernel mutex but we peek the contents to + lock_sys_t::mutex but we peek the contents to determine whether other transactions have acquired the AUTOINC lock or not. Of course only one transaction can be granted the lock but there can be multiple waiters. */ - const trx_t* autoinc_trx; + const trx_t* autoinc_trx; /*!< The transaction that currently holds the - the AUTOINC lock on this table. */ + the AUTOINC lock on this table. + Protected by lock_sys->mutex. */ + fts_t* fts; /* FTS specific state variables */ /* @} */ /*----------------------*/ + ulint n_rec_locks; + /*!< Count of the number of record locks on + this table. We use this to determine whether + we can evict the table from the dictionary + cache. It is protected by lock_sys->mutex. */ + ulint n_ref_count; + /*!< count of how many handles are opened + to this table; dropping of the table is + NOT allowed until this count gets to zero; + MySQL does NOT itself check the number of + open handles at drop */ + UT_LIST_BASE_NODE_T(lock_t) + locks; /*!< list of locks on the table; protected + by lock_sys->mutex */ #endif /* !UNIV_HOTBACKUP */ #ifdef UNIV_DEBUG diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic index 1d80ffc9b94..38d51f61789 100644 --- a/storage/innobase/include/dict0mem.ic +++ b/storage/innobase/include/dict0mem.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -57,16 +57,18 @@ dict_mem_fill_index_struct( index->fields = NULL; } - index->type = type; + /* Assign a ulint to a 4-bit-mapped field. + Only the low-order 4 bits are assigned. */ + index->type = type; #ifndef UNIV_HOTBACKUP - index->space = (unsigned int) space; - index->page = FIL_NULL; + index->space = (unsigned int) space; + index->page = FIL_NULL; #endif /* !UNIV_HOTBACKUP */ - index->table_name = table_name; - index->n_fields = (unsigned int) n_fields; - /* The '1 +' above prevents allocation - of an empty mem block */ + index->table_name = table_name; + index->n_fields = (unsigned int) n_fields; + /* The '1 +' above prevents allocation + of an empty mem block */ #ifdef UNIV_DEBUG - index->magic_n = DICT_INDEX_MAGIC_N; + index->magic_n = DICT_INDEX_MAGIC_N; #endif /* UNIV_DEBUG */ } diff --git a/storage/innobase/include/dict0priv.h b/storage/innobase/include/dict0priv.h new file mode 100644 index 00000000000..69eeb835885 --- /dev/null +++ b/storage/innobase/include/dict0priv.h @@ -0,0 +1,61 @@ +/***************************************************************************** + +Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/dict0priv.h +Data dictionary private functions + +Created Fri 2 Jul 2010 13:30:38 EST - Sunny Bains +*******************************************************/ + +#ifndef dict0priv_h +#define dict0priv_h + +/**********************************************************************//** +Gets a table; loads it to the dictionary cache if necessary. A low-level +function. Note: Not to be called from outside dict0*c functions. +@return table, NULL if not found */ +UNIV_INLINE +dict_table_t* +dict_table_get_low( +/*===============*/ + const char* table_name); /*!< in: table name */ + +/**********************************************************************//** +Checks if a table is in the dictionary cache. +@return table, NULL if not found */ +UNIV_INLINE +dict_table_t* +dict_table_check_if_in_cache_low( +/*=============================*/ + const char* table_name); /*!< in: table name */ + +/**********************************************************************//** +Returns a table object based on table id. +@return table, NULL if does not exist */ +UNIV_INLINE +dict_table_t* +dict_table_open_on_id_low( +/*=====================*/ + table_id_t table_id); /*!< in: table id */ + +#ifndef UNIV_NONINL +#include "dict0priv.ic" +#endif + +#endif /* dict0priv.h */ diff --git a/storage/innobase/include/dict0priv.ic b/storage/innobase/include/dict0priv.ic new file mode 100644 index 00000000000..e15fbc65a63 --- /dev/null +++ b/storage/innobase/include/dict0priv.ic @@ -0,0 +1,123 @@ +/***************************************************************************** + +Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/dict0priv.ic +Data dictionary system private include file + +Created Wed 13 Oct 2010 16:10:14 EST Sunny Bains +***********************************************************************/ + +#include "dict0dict.h" +#include "dict0load.h" +#include "dict0priv.h" +#ifndef UNIV_HOTBACKUP + +/**********************************************************************//** +Gets a table; loads it to the dictionary cache if necessary. A low-level +function. +@return table, NULL if not found */ +UNIV_INLINE +dict_table_t* +dict_table_get_low( +/*===============*/ + const char* table_name) /*!< in: table name */ +{ + dict_table_t* table; + + ut_ad(table_name); + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = dict_table_check_if_in_cache_low(table_name); + + if (table && table->corrupted) { + fprintf(stderr, "InnoDB: table"); + ut_print_name(stderr, NULL, TRUE, table->name); + if (srv_load_corrupted) { + fputs(" is corrupted, but" + " innodb_force_load_corrupted is set\n", stderr); + } else { + fputs(" is corrupted\n", stderr); + return(NULL); + } + } + + if (table == NULL) { + table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE); + } + + ut_ad(!table || table->cached); + + return(table); +} + +/**********************************************************************//** +Returns a table object based on table id. +@return table, NULL if does not exist */ +UNIV_INLINE +dict_table_t* +dict_table_open_on_id_low( +/*======================*/ + table_id_t table_id) /*!< in: table id */ +{ + dict_table_t* table; + ulint fold; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* Look for the table name in the hash table */ + fold = ut_fold_ull(table_id); + + HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, + dict_table_t*, table, ut_ad(table->cached), + table->id == table_id); + if (table == NULL) { + table = dict_load_table_on_id(table_id); + } + + ut_ad(!table || table->cached); + + /* TODO: should get the type information from MySQL */ + + return(table); +} + +/**********************************************************************//** +Checks if a table is in the dictionary cache. +@return table, NULL if not found */ +UNIV_INLINE +dict_table_t* +dict_table_check_if_in_cache_low( +/*=============================*/ + const char* table_name) /*!< in: table name */ +{ + dict_table_t* table; + ulint table_fold; + + ut_ad(table_name); + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* Look for the table name in the hash table */ + table_fold = ut_fold_string(table_name); + + HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, + dict_table_t*, table, ut_ad(table->cached), + !strcmp(table->name, table_name)); + return(table); +} +#endif /*! UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/dict0stats.h b/storage/innobase/include/dict0stats.h new file mode 100644 index 00000000000..879e67a0918 --- /dev/null +++ b/storage/innobase/include/dict0stats.h @@ -0,0 +1,108 @@ +/***************************************************************************** + +Copyright (c) 2009, 2010, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/dict0stats.h +Code used for calculating and manipulating table statistics. + +Created Jan 06, 2010 Vasil Dimov +*******************************************************/ + +#ifndef dict0stats_h +#define dict0stats_h + +#include "univ.i" + +#include "db0err.h" +#include "dict0types.h" +#include "trx0types.h" + +enum dict_stats_upd_option { + DICT_STATS_RECALC_PERSISTENT,/* (re) calculate the + statistics using a precise and slow + algo and save them to the persistent + storage, if the persistent storage is + not present then emit a warning and + fall back to transient stats */ + DICT_STATS_RECALC_PERSISTENT_SILENT,/* same as + DICT_STATS_RECALC_PERSISTENT + but do not emit a warning */ + DICT_STATS_RECALC_TRANSIENT,/* (re) calculate the statistics + using an imprecise quick algo + without saving the results + persistently */ + DICT_STATS_FETCH, /* fetch the statistics from the + persistent storage */ + DICT_STATS_FETCH_ONLY_IF_NOT_IN_MEMORY /* only fetch the stats + from the persistent storage if the in-memory + structures have not been initialized yet, + otherwise do nothing */ +}; + +typedef enum dict_stats_upd_option dict_stats_upd_option_t; + +/*********************************************************************//** +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. +@return DB_* error code or DB_SUCCESS */ +UNIV_INTERN +enum db_err +dict_stats_update( +/*==============*/ + dict_table_t* table, /*!< in/out: table */ + dict_stats_upd_option_t stats_upd_option, + /*!< in: whether to (re) calc + the stats or to fetch them from + the persistent storage */ + ibool caller_has_dict_sys_mutex); + /*!< in: TRUE if the caller + owns dict_sys->mutex */ + +/*********************************************************************//** +Removes the information for a particular index's stats from the persistent +storage if it exists and if there is data stored for this index. +The transaction is not committed, it must not be committed in this +function because this is the user trx that is running DROP INDEX. +The transaction will be committed at the very end when dropping an +index. +@return DB_SUCCESS or error code */ +UNIV_INTERN +enum db_err +dict_stats_delete_index_stats( +/*==========================*/ + dict_index_t* index, /*!< in: index */ + trx_t* trx, /*!< in: transaction to use */ + char* errstr, /*!< out: error message if != DB_SUCCESS + is returned */ + ulint errstr_sz);/*!< in: size of the errstr buffer */ + +/*********************************************************************//** +Removes the statistics for a table and all of its indexes from the +persistent storage if it exists and if there is data stored for the table. +This function creates its own transaction and commits it. +@return DB_SUCCESS or error code */ +UNIV_INTERN +enum db_err +dict_stats_delete_table_stats( +/*==========================*/ + const char* table_name, /*!< in: table name */ + char* errstr, /*!< out: error message + if != DB_SUCCESS is returned */ + ulint errstr_sz); /*!< in: size of errstr buffer */ + +#endif /* dict0stats_h */ diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h index f0a05a38070..cd2863582c1 100644 --- a/storage/innobase/include/dict0types.h +++ b/storage/innobase/include/dict0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -40,6 +40,10 @@ typedef struct tab_node_struct tab_node_t; #define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */ #define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO +/* The ibuf table and indexes's ID are assigned as the number +DICT_IBUF_ID_MIN plus the space id */ +#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL + typedef ib_id_t table_id_t; typedef ib_id_t index_id_t; @@ -49,11 +53,11 @@ be responsible to deal with corrupted table or index. Note: please define the IGNORE_ERR_* as bits, so their value can be or-ed together */ enum dict_err_ignore { - DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */ - DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root + DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */ + DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root page is FIL_NULL or incorrect value */ DICT_ERR_IGNORE_CORRUPT = 2, /*!< skip corrupted indexes */ - DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */ + DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */ }; typedef enum dict_err_ignore dict_err_ignore_t; diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h index 121a5946ac7..5e69cb13122 100644 --- a/storage/innobase/include/dyn0dyn.h +++ b/storage/innobase/include/dyn0dyn.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic index 110e674abff..b86697d6865 100644 --- a/storage/innobase/include/dyn0dyn.ic +++ b/storage/innobase/include/dyn0dyn.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/eval0eval.h b/storage/innobase/include/eval0eval.h index 60aefd8d453..e3b1e6c16b6 100644 --- a/storage/innobase/include/eval0eval.h +++ b/storage/innobase/include/eval0eval.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/eval0eval.ic b/storage/innobase/include/eval0eval.ic index fe767f39b00..e4b1dd08017 100644 --- a/storage/innobase/include/eval0eval.ic +++ b/storage/innobase/include/eval0eval.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -70,7 +70,7 @@ eval_node_ensure_val_buf( dfield = que_node_get_val(node); dfield_set_len(dfield, size); - data = dfield_get_data(dfield); + data = static_cast<byte*>(dfield_get_data(dfield)); if (!data || que_node_get_val_buf_size(node) < size) { @@ -110,12 +110,12 @@ eval_exp( { if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) { - eval_sym((sym_node_t*)exp_node); + eval_sym((sym_node_t*) exp_node); return; } - eval_func(exp_node); + eval_func(static_cast<func_node_t*>(exp_node)); } /*****************************************************************//** @@ -132,7 +132,7 @@ eval_node_set_int_val( dfield = que_node_get_val(node); - data = dfield_get_data(dfield); + data = static_cast<byte*>(dfield_get_data(dfield)); if (data == NULL) { data = eval_node_alloc_val_buf(node, 4); @@ -140,7 +140,7 @@ eval_node_set_int_val( ut_ad(dfield_get_len(dfield) == 4); - mach_write_to_4(data, (ulint)val); + mach_write_to_4(data, (ulint) val); } /*****************************************************************//** @@ -152,13 +152,15 @@ eval_node_get_int_val( /*==================*/ que_node_t* node) /*!< in: expression node */ { + const byte* ptr; dfield_t* dfield; dfield = que_node_get_val(node); + ptr = static_cast<byte*>(dfield_get_data(dfield)); ut_ad(dfield_get_len(dfield) == 4); - return((int)mach_read_from_4(dfield_get_data(dfield))); + return((int) mach_read_from_4(ptr)); } /*****************************************************************//** @@ -175,7 +177,7 @@ eval_node_get_ibool_val( dfield = que_node_get_val(node); - data = dfield_get_data(dfield); + data = static_cast<byte*>(dfield_get_data(dfield)); ut_ad(data != NULL); @@ -196,7 +198,7 @@ eval_node_set_ibool_val( dfield = que_node_get_val(func_node); - data = dfield_get_data(dfield); + data = static_cast<byte*>(dfield_get_data(dfield)); if (data == NULL) { /* Allocate 1 byte to hold the value */ @@ -246,6 +248,8 @@ eval_node_copy_val( dfield2 = que_node_get_val(node2); - eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2), - dfield_get_len(dfield2)); + eval_node_copy_and_alloc_val( + node1, + static_cast<byte*>(dfield_get_data(dfield2)), + dfield_get_len(dfield2)); } diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h index 13e2e365320..7755fb10343 100644 --- a/storage/innobase/include/eval0proc.h +++ b/storage/innobase/include/eval0proc.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic index c602af0a694..81418bae2c9 100644 --- a/storage/innobase/include/eval0proc.ic +++ b/storage/innobase/include/eval0proc.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1998, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -40,7 +40,7 @@ proc_step( ut_ad(thr); - node = thr->run_node; + node = static_cast<proc_node_t*>(thr->run_node); ut_ad(que_node_get_type(node) == QUE_NODE_PROC); if (thr->prev_node == que_node_get_parent(node)) { @@ -75,7 +75,7 @@ proc_eval_step( ut_ad(thr); - node = thr->run_node; + node = static_cast<func_node_t*>(thr->run_node); ut_ad(que_node_get_type(node) == QUE_NODE_FUNC); /* Evaluate the procedure */ diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 610bd4b0e5c..fa632ea3c6b 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -27,12 +27,16 @@ Created 10/25/1995 Heikki Tuuri #define fil0fil_h #include "univ.i" + +#ifndef UNIV_INNOCHECKSUM + #include "dict0types.h" #include "ut0byte.h" #include "os0file.h" #ifndef UNIV_HOTBACKUP #include "sync0rw.h" #include "ibuf0types.h" +#include "log0log.h" #endif /* !UNIV_HOTBACKUP */ /** When mysqld is run, the default directory "." is the mysqld datadir, @@ -70,6 +74,8 @@ struct fil_addr_struct{ /** The null file address */ extern fil_addr_t fil_addr_null; +#endif /* !UNIV_INNOCHECKSUM */ + /** The byte offsets on a file page for various variables @{ */ #define FIL_PAGE_SPACE_OR_CHKSUM 0 /*!< in < MySQL-4.0.14 space id the page belongs to (== 0) but in later @@ -127,6 +133,8 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_DATA_END 8 /*!< size of the page trailer */ /* @} */ +#ifndef UNIV_INNOCHECKSUM + /** File page types (values of FIL_PAGE_TYPE) @{ */ #define FIL_PAGE_INDEX 17855 /*!< B-tree node */ #define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */ @@ -142,6 +150,8 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_TYPE_BLOB 10 /*!< Uncompressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB 11 /*!< First compressed BLOB page */ #define FIL_PAGE_TYPE_ZBLOB2 12 /*!< Subsequent compressed BLOB page */ +#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_ZBLOB2 + /*!< Last page type */ /* @} */ /** Space types @{ */ @@ -157,6 +167,8 @@ extern ulint fil_n_pending_log_flushes; /** Number of pending tablespace flushes */ extern ulint fil_n_pending_tablespace_flushes; +/** Number of files currently open */ +extern ulint fil_n_file_opened; #ifndef UNIV_HOTBACKUP /*******************************************************************//** @@ -213,8 +225,8 @@ fil_space_truncate_start( some initial files in the space */ #endif /* UNIV_LOG_ARCHIVE */ /*******************************************************************//** -Creates a space memory object and puts it to the 'fil system' hash table. If -there is an error, prints an error message to the .err log. +Creates a space memory object and puts it to the 'fil system' hash table. +If there is an error, prints an error message to the .err log. @return TRUE if success */ UNIV_INTERN ibool @@ -320,12 +332,11 @@ UNIV_INTERN ulint fil_write_flushed_lsn_to_data_files( /*================================*/ - ib_uint64_t lsn, /*!< in: lsn to write */ - ulint arch_log_no); /*!< in: latest archived log - file number */ + lsn_t lsn, /*!< in: lsn to write */ + ulint arch_log_no); /*!< in: latest archived log file number */ /*******************************************************************//** -Reads the flushed lsn and arch no fields from a data file at database -startup. */ +Reads the flushed lsn, arch no, and tablespace flag fields from a data +file at database startup. */ UNIV_INTERN void fil_read_first_page( @@ -341,24 +352,25 @@ fil_read_first_page( ulint* max_arch_log_no, /*!< out: max of archived log numbers in data files */ #endif /* UNIV_LOG_ARCHIVE */ - ib_uint64_t* min_flushed_lsn, /*!< out: min of flushed + lsn_t* min_flushed_lsn, /*!< out: min of flushed lsn values in data files */ - ib_uint64_t* max_flushed_lsn); /*!< out: max of flushed + lsn_t* max_flushed_lsn); /*!< out: max of flushed lsn values in data files */ /*******************************************************************//** -Increments the count of pending operation, if space is not being deleted. -@return TRUE if being deleted, and operation should be skipped */ +Increments the count of pending insert buffer page merges, if space is not +being deleted. +@return TRUE if being deleted, and ibuf merges should be skipped */ UNIV_INTERN ibool -fil_inc_pending_ops( -/*================*/ +fil_inc_pending_ibuf_merges( +/*========================*/ ulint id); /*!< in: space id */ /*******************************************************************//** -Decrements the count of pending operations. */ +Decrements the count of pending insert buffer page merges. */ UNIV_INTERN void -fil_decr_pending_ops( -/*=================*/ +fil_decr_pending_ibuf_merges( +/*=========================*/ ulint id); /*!< in: space id */ #endif /* !UNIV_HOTBACKUP */ /*******************************************************************//** @@ -397,9 +409,7 @@ UNIV_INTERN ibool fil_delete_tablespace( /*==================*/ - ulint id, /*!< in: space id */ - ibool evict_all); /*!< in: TRUE if we want all pages - evicted from LRU. */ + ulint id); /*!< in: space id */ #ifndef UNIV_HOTBACKUP /*******************************************************************//** Discards a single-table tablespace. The tablespace must be cached in the @@ -424,7 +434,7 @@ UNIV_INTERN ibool fil_rename_tablespace( /*==================*/ - const char* old_name, /*!< in: old table name in the standard + const char* old_name_in, /*!< in: old table name in the standard databasename/tablename format of InnoDB, or NULL if we do the rename based on the space id only */ @@ -452,6 +462,7 @@ fil_create_new_single_table_tablespace( ibool is_temp, /*!< in: TRUE if a table created with CREATE TEMPORARY TABLE */ ulint flags, /*!< in: tablespace flags */ + ulint flags2, /*!< in: table flags2 */ ulint size); /*!< in: the initial size of the tablespace file in pages, must be >= FIL_IBD_FILE_INITIAL_SIZE */ @@ -497,7 +508,7 @@ fil_reset_too_high_lsns( /*====================*/ const char* name, /*!< in: table name in the databasename/tablename format */ - ib_uint64_t current_lsn); /*!< in: reset lsn's if the lsn stamped + lsn_t current_lsn); /*!< in: reset lsn's if the lsn stamped to FIL_PAGE_FILE_FLUSH_LSN in the first page is too high */ #endif /* !UNIV_HOTBACKUP */ @@ -516,7 +527,7 @@ fil_load_single_table_tablespaces(void); /*******************************************************************//** Returns TRUE if a single-table tablespace does not exist in the memory cache, or is being deleted there. -@return TRUE if does not exist or is being\ deleted */ +@return TRUE if does not exist or is being deleted */ UNIV_INTERN ibool fil_tablespace_deleted_or_being_deleted_in_mem( @@ -545,10 +556,7 @@ fil_space_for_table_exists_in_mem( /*==============================*/ ulint id, /*!< in: space id */ const char* name, /*!< in: table name in the standard - 'databasename/tablename' format or - the dir path to a temp table */ - ibool is_temp, /*!< in: TRUE if created with CREATE - TEMPORARY TABLE */ + 'databasename/tablename' format */ ibool mark_space, /*!< in: in crash recovery, at database startup we mark all spaces which have an associated table in the InnoDB @@ -649,7 +657,7 @@ fil_io( /**********************************************************************//** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided -into segments (see os0file.c for more info). The thread specifies which +into segments (see os0file.cc for more info). The thread specifies which segment it wants to wait for. */ UNIV_INTERN void @@ -734,4 +742,6 @@ fil_tablespace_is_being_deleted( typedef struct fil_space_struct fil_space_t; +#endif /* !UNIV_INNOCHECKSUM */ + #endif diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h index f07e3decc66..994783c2db9 100644 --- a/storage/innobase/include/fsp0fsp.h +++ b/storage/innobase/include/fsp0fsp.h @@ -28,26 +28,93 @@ Created 12/18/1995 Heikki Tuuri #include "univ.i" +#ifndef UNIV_INNOCHECKSUM + #include "mtr0mtr.h" #include "fut0lst.h" #include "ut0byte.h" #include "page0types.h" #include "fsp0types.h" +#endif /* !UNIV_INNOCHECKSUM */ + /* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */ +/** Width of the POST_ANTELOPE flag */ +#define FSP_FLAGS_WIDTH_POST_ANTELOPE 1 +/** Number of flag bits used to indicate the tablespace zip page size */ +#define FSP_FLAGS_WIDTH_ZIP_SSIZE 4 +/** Width of the ATOMIC_BLOBS flag. The ability to break up a long +column into an in-record prefix and an externally stored part is available +to the two Barracuda row formats COMPRESSED and DYNAMIC. */ +#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS 1 /** Number of flag bits used to indicate the tablespace page size */ #define FSP_FLAGS_WIDTH_PAGE_SSIZE 4 +/** Width of all the currently known tablespace flags */ +#define FSP_FLAGS_WIDTH (FSP_FLAGS_WIDTH_POST_ANTELOPE \ + + FSP_FLAGS_WIDTH_ZIP_SSIZE \ + + FSP_FLAGS_WIDTH_ATOMIC_BLOBS \ + + FSP_FLAGS_WIDTH_PAGE_SSIZE) + +/** A mask of all the known/used bits in tablespace flags */ +#define FSP_FLAGS_MASK (~(~0 << FSP_FLAGS_WIDTH)) + +/** Zero relative shift position of the POST_ANTELOPE field */ +#define FSP_FLAGS_POS_POST_ANTELOPE 0 +/** Zero relative shift position of the ZIP_SSIZE field */ +#define FSP_FLAGS_POS_ZIP_SSIZE (FSP_FLAGS_POS_POST_ANTELOPE \ + + FSP_FLAGS_WIDTH_POST_ANTELOPE) +/** Zero relative shift position of the ATOMIC_BLOBS field */ +#define FSP_FLAGS_POS_ATOMIC_BLOBS (FSP_FLAGS_POS_ZIP_SSIZE \ + + FSP_FLAGS_WIDTH_ZIP_SSIZE) /** Zero relative shift position of the PAGE_SSIZE field */ -#define FSP_FLAGS_POS_PAGE_SSIZE 6 +#define FSP_FLAGS_POS_PAGE_SSIZE (FSP_FLAGS_POS_ATOMIC_BLOBS \ + + FSP_FLAGS_WIDTH_ATOMIC_BLOBS) +/** Zero relative shift position of the start of the UNUSED bits */ +#define FSP_FLAGS_POS_UNUSED (FSP_FLAGS_POS_PAGE_SSIZE \ + + FSP_FLAGS_WIDTH_PAGE_SSIZE) + +/** Bit mask of the POST_ANTELOPE field */ +#define FSP_FLAGS_MASK_POST_ANTELOPE \ + ((~(~0 << FSP_FLAGS_WIDTH_POST_ANTELOPE)) \ + << FSP_FLAGS_POS_POST_ANTELOPE) +/** Bit mask of the ZIP_SSIZE field */ +#define FSP_FLAGS_MASK_ZIP_SSIZE \ + ((~(~0 << FSP_FLAGS_WIDTH_ZIP_SSIZE)) \ + << FSP_FLAGS_POS_ZIP_SSIZE) +/** Bit mask of the ATOMIC_BLOBS field */ +#define FSP_FLAGS_MASK_ATOMIC_BLOBS \ + ((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_BLOBS)) \ + << FSP_FLAGS_POS_ATOMIC_BLOBS) /** Bit mask of the PAGE_SSIZE field */ #define FSP_FLAGS_MASK_PAGE_SSIZE \ ((~(~0 << FSP_FLAGS_WIDTH_PAGE_SSIZE)) \ << FSP_FLAGS_POS_PAGE_SSIZE) + +/** Return the value of the POST_ANTELOPE field */ +#define FSP_FLAGS_GET_POST_ANTELOPE(flags) \ + ((flags & FSP_FLAGS_MASK_POST_ANTELOPE) \ + >> FSP_FLAGS_POS_POST_ANTELOPE) +/** Return the value of the ZIP_SSIZE field */ +#define FSP_FLAGS_GET_ZIP_SSIZE(flags) \ + ((flags & FSP_FLAGS_MASK_ZIP_SSIZE) \ + >> FSP_FLAGS_POS_ZIP_SSIZE) +/** Return the value of the ATOMIC_BLOBS field */ +#define FSP_FLAGS_HAS_ATOMIC_BLOBS(flags) \ + ((flags & FSP_FLAGS_MASK_ATOMIC_BLOBS) \ + >> FSP_FLAGS_POS_ATOMIC_BLOBS) /** Return the value of the PAGE_SSIZE field */ #define FSP_FLAGS_GET_PAGE_SSIZE(flags) \ ((flags & FSP_FLAGS_MASK_PAGE_SSIZE) \ >> FSP_FLAGS_POS_PAGE_SSIZE) +/** Return the contents of the UNUSED bits */ +#define FSP_FLAGS_GET_UNUSED(flags) \ + (flags >> FSP_FLAGS_POS_UNUSED) + +/** Set a PAGE_SSIZE into the correct bits in a given +tablespace flags. */ +#define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize) \ + (flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE)) /* @} */ @@ -116,6 +183,142 @@ descriptor page, but used only in the first. */ FSP_FREE_LIMIT at a time */ /* @} */ +#ifndef UNIV_INNOCHECKSUM + +/* @defgroup File Segment Inode Constants (moved from fsp0fsp.c) @{ */ + +/* FILE SEGMENT INODE + ================== + +Segment inode which is created for each segment in a tablespace. NOTE: in +purge we assume that a segment having only one currently used page can be +freed in a few steps, so that the freeing cannot fill the file buffer with +bufferfixed file pages. */ + +typedef byte fseg_inode_t; + +#define FSEG_INODE_PAGE_NODE FSEG_PAGE_DATA + /* the list node for linking + segment inode pages */ + +#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE) +/*-------------------------------------*/ +#define FSEG_ID 0 /* 8 bytes of segment id: if this is 0, + it means that the header is unused */ +#define FSEG_NOT_FULL_N_USED 8 + /* number of used segment pages in + the FSEG_NOT_FULL list */ +#define FSEG_FREE 12 + /* list of free extents of this + segment */ +#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE) + /* list of partially free extents */ +#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE) + /* list of full extents */ +#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE) + /* magic number used in debugging */ +#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE) + /* array of individual pages + belonging to this segment in fsp + fragment extent lists */ +#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2) + /* number of slots in the array for + the fragment pages */ +#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its + page number within space, FIL_NULL + means that the slot is not in use */ +/*-------------------------------------*/ +#define FSEG_INODE_SIZE \ + (16 + 3 * FLST_BASE_NODE_SIZE \ + + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE) + +#define FSP_SEG_INODES_PER_PAGE(zip_size) \ + (((zip_size ? zip_size : UNIV_PAGE_SIZE) \ + - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE) + /* Number of segment inodes which fit on a + single page */ + +#define FSEG_MAGIC_N_VALUE 97937874 + +#define FSEG_FILLFACTOR 8 /* If this value is x, then if + the number of unused but reserved + pages in a segment is less than + reserved pages * 1/x, and there are + at least FSEG_FRAG_LIMIT used pages, + then we allow a new empty extent to + be added to the segment in + fseg_alloc_free_page. Otherwise, we + use unused pages of the segment. */ + +#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS + /* If the segment has >= this many + used pages, it may be expanded by + allocating extents to the segment; + until that only individual fragment + pages are allocated from the space */ + +#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment + is at least this many extents, we + allow extents to be put to the free + list of the extent: at most + FSEG_FREE_LIST_MAX_LEN many */ +#define FSEG_FREE_LIST_MAX_LEN 4 +/* @} */ + +/* @defgroup Extent Descriptor Constants (moved from fsp0fsp.c) @{ */ + +/* EXTENT DESCRIPTOR + ================= + +File extent descriptor data structure: contains bits to tell which pages in +the extent are free and which contain old tuple version to clean. */ + +/*-------------------------------------*/ +#define XDES_ID 0 /* The identifier of the segment + to which this extent belongs */ +#define XDES_FLST_NODE 8 /* The list node data structure + for the descriptors */ +#define XDES_STATE (FLST_NODE_SIZE + 8) + /* contains state information + of the extent */ +#define XDES_BITMAP (FLST_NODE_SIZE + 12) + /* Descriptor bitmap of the pages + in the extent */ +/*-------------------------------------*/ + +#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */ +#define XDES_FREE_BIT 0 /* Index of the bit which tells if + the page is free */ +#define XDES_CLEAN_BIT 1 /* NOTE: currently not used! + Index of the bit which tells if + there are old versions of tuples + on the page */ +/* States of a descriptor */ +#define XDES_FREE 1 /* extent is in free list of space */ +#define XDES_FREE_FRAG 2 /* extent is in free fragment list of + space */ +#define XDES_FULL_FRAG 3 /* extent is in full fragment list of + space */ +#define XDES_FSEG 4 /* extent belongs to a segment */ + +/** File extent data structure size in bytes. */ +#define XDES_SIZE \ + (XDES_BITMAP \ + + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE)) + +/** File extent data structure size in bytes for MAX page size. */ +#define XDES_SIZE_MAX \ + (XDES_BITMAP \ + + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MAX * XDES_BITS_PER_PAGE)) + +/** File extent data structure size in bytes for MIN page size. */ +#define XDES_SIZE_MIN \ + (XDES_BITMAP \ + + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE_MIN * XDES_BITS_PER_PAGE)) + +/** Offset of the descriptor array on a descriptor page */ +#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE) + /* @} */ /**********************************************************************//** @@ -125,16 +328,6 @@ void fsp_init(void); /*==========*/ /**********************************************************************//** -Gets the current free limit of the system tablespace. The free limit -means the place of the first page which has never been put to the -free list for allocation. The space above that address is initialized -to zero. Sets also the global variable log_fsp_current_free_limit. -@return free limit in megabytes */ -UNIV_INTERN -ulint -fsp_header_get_free_limit(void); -/*===========================*/ -/**********************************************************************//** Gets the size of the system tablespace from the tablespace header. If we do not have an auto-extending data file, this should be equal to the size of the data files. If there is an auto-extending data file, @@ -177,9 +370,9 @@ fsp_header_get_zip_size( /*====================*/ const page_t* page); /*!< in: first page of a tablespace */ /**********************************************************************//** -Writes the space id and compressed page size to a tablespace header. -This function is used past the buffer pool when we in fil0fil.c create -a new single-table tablespace. */ +Writes the space id and flags to a tablespace header. The flags contain +row type, physical/compressed page size, and logical/uncompressed page +size of the tablespace. */ UNIV_INTERN void fsp_header_init_fields( @@ -197,16 +390,16 @@ fsp_header_init( /*============*/ ulint space, /*!< in: space id */ ulint size, /*!< in: current size in blocks */ - mtr_t* mtr); /*!< in: mini-transaction handle */ + mtr_t* mtr); /*!< in/out: mini-transaction */ /**********************************************************************//** Increases the space size field of a space. */ UNIV_INTERN void fsp_header_inc_size( /*================*/ - ulint space, /*!< in: space id */ - ulint size_inc,/*!< in: size increment in pages */ - mtr_t* mtr); /*!< in: mini-transaction handle */ + ulint space, /*!< in: space id */ + ulint size_inc, /*!< in: size increment in pages */ + mtr_t* mtr); /*!< in/out: mini-transaction */ /**********************************************************************//** Creates a new segment. @return the block where the segment header is placed, x-latched, NULL @@ -222,7 +415,7 @@ fseg_create( will belong to the created segment */ ulint byte_offset, /*!< in: byte offset of the created segment header on the page */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr); /*!< in/out: mini-transaction */ /**********************************************************************//** Creates a new segment. @return the block where the segment header is placed, x-latched, NULL @@ -244,7 +437,7 @@ fseg_create_general( the inode and the other for the segment) then there is no need to do the check for this individual operation */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr); /*!< in/out: mini-transaction */ /**********************************************************************//** Calculates the number of pages reserved by a segment, and how many pages are currently used. @@ -255,7 +448,7 @@ fseg_n_reserved_pages( /*==================*/ fseg_header_t* header, /*!< in: segment header */ ulint* used, /*!< out: number of pages used (<= reserved) */ - mtr_t* mtr); /*!< in: mtr handle */ + mtr_t* mtr); /*!< in/out: mini-transaction */ /**********************************************************************//** Allocates a single free page from a segment. This function implements the intelligent allocation strategy which tries to minimize @@ -339,7 +532,7 @@ fsp_reserve_free_extents( ulint space, /*!< in: space id */ ulint n_ext, /*!< in: number of extents to reserve */ ulint alloc_type,/*!< in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr); /*!< in: mini-transaction */ /**********************************************************************//** This function should be used to get information on how much we still will be able to insert new data to the database without running out the @@ -360,7 +553,7 @@ fseg_free_page( fseg_header_t* seg_header, /*!< in: segment header */ ulint space, /*!< in: space id */ ulint page, /*!< in: page offset */ - mtr_t* mtr); /*!< in: mtr handle */ + mtr_t* mtr); /*!< in/out: mini-transaction */ /**********************************************************************//** Frees part of a segment. This function can be used to free a segment by repeatedly calling this function in different mini-transactions. @@ -375,7 +568,7 @@ fseg_free_step( resides on the first page of the frag list of the segment, this pointer becomes obsolete after the last freeing step */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr); /*!< in/out: mini-transaction */ /**********************************************************************//** Frees part of a segment. Differs from fseg_free_step because this function leaves the header page unfreed. @@ -386,7 +579,7 @@ fseg_free_step_not_header( /*======================*/ fseg_header_t* header, /*!< in: segment header which must reside on the first fragment page of the segment */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr); /*!< in/out: mini-transaction */ /***********************************************************************//** Checks if a page address is an extent descriptor page address. @return TRUE if a descriptor page */ @@ -431,7 +624,7 @@ ibool fseg_validate( /*==========*/ fseg_header_t* header, /*!< in: segment header */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr); /*!< in/out: mini-transaction */ #endif /* UNIV_DEBUG */ #ifdef UNIV_BTR_PRINT /*******************************************************************//** @@ -441,14 +634,44 @@ void fseg_print( /*=======*/ fseg_header_t* header, /*!< in: segment header */ - mtr_t* mtr); /*!< in: mtr */ + mtr_t* mtr); /*!< in/out: mini-transaction */ #endif /* UNIV_BTR_PRINT */ /********************************************************************//** +Validate and return the tablespace flags, which are stored in the +tablespace header at offset FSP_SPACE_FLAGS. They should be 0 for +ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats, +COMPRESSED and DYNAMIC, use a file format > Antelope so they should +have a file format number plus the DICT_TF_COMPACT bit set. +@return ulint containing the validated tablespace flags. */ +UNIV_INLINE +ulint +fsp_flags_validate( +/*===============*/ + ulint flags); /*!< in: tablespace flags */ +/********************************************************************//** +Determine if the tablespace is compressed from dict_table_t::flags. +@return TRUE if compressed, FALSE if not compressed */ +UNIV_INLINE +ibool +fsp_flags_is_compressed( +/*====================*/ + ulint flags); /*!< in: tablespace flags */ + +#endif /* !UNIV_INNOCHECKSUM */ + +/********************************************************************//** +Extract the zip size from tablespace flags. A tablespace has only one +physical page size whether that page is compressed or not. +@return compressed page size of the file-per-table tablespace in bytes, +or zero if the table is not compressed. */ +UNIV_INLINE +ulint +fsp_flags_get_zip_size( +/*====================*/ + ulint flags); /*!< in: tablespace flags */ +/********************************************************************//** Extract the page size from tablespace flags. -This feature, storing the page_ssize into the tablespace flags, is added -to InnoDB 5.6.4. This is here only to protect against a crash if a newer -database is opened with this code branch. @return page size of the tablespace in bytes */ UNIV_INLINE ulint @@ -456,6 +679,15 @@ fsp_flags_get_page_size( /*====================*/ ulint flags); /*!< in: tablespace flags */ +/********************************************************************//** +Set page size */ +UNIV_INLINE +ulint +fsp_flags_set_page_size( +/*====================*/ + ulint flags, /*!< in: tablespace flags */ + ulint page_size); /*!< in: page size in bytes */ + #ifndef UNIV_NONINL #include "fsp0fsp.ic" #endif diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic index c92111a9d89..498f9000888 100644 --- a/storage/innobase/include/fsp0fsp.ic +++ b/storage/innobase/include/fsp0fsp.ic @@ -23,6 +23,8 @@ File space management Created 12/18/1995 Heikki Tuuri *******************************************************/ +#ifndef UNIV_INNOCHECKSUM + /***********************************************************************//** Checks if a page address is an extent descriptor page address. @return TRUE if a descriptor page */ @@ -37,17 +39,111 @@ fsp_descr_page( ut_ad(ut_is_2pow(zip_size)); if (!zip_size) { - return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1)) - == FSP_XDES_OFFSET)); + return((page_no & (UNIV_PAGE_SIZE - 1)) == FSP_XDES_OFFSET); } - return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET)); + return((page_no & (zip_size - 1)) == FSP_XDES_OFFSET); } + +/********************************************************************//** +Validate and return the tablespace flags, which are stored in the +tablespace header at offset FSP_SPACE_FLAGS. They should be 0 for +ROW_FORMAT=COMPACT and ROW_FORMAT=REDUNDANT. The newer row formats, +COMPRESSED and DYNAMIC, use a file format > Antelope so they should +have a file format number plus the DICT_TF_COMPACT bit set. +@return Same as input after validating it as FSP_SPACE_FLAGS. +If there is an error, trigger assertion failure. */ +UNIV_INLINE +ulint +fsp_flags_validate( +/*===============*/ + ulint flags) /*!< in: tablespace flags */ +{ + ulint post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(flags); + ulint zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags); + ulint atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags); + ulint page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags); + ulint unused = FSP_FLAGS_GET_UNUSED(flags); + + /* Make sure there are no bits that we do not know about. */ + ut_a(unused == 0); + + /* fsp_flags is zero unless atomic_blobs is set. */ + ut_a(flags != 1); + if (post_antelope) { + /* The Antelope row formats REDUNDANT and COMPACT did + not use tablespace flags, so this flag and the entire + 4-byte field is zero for Antelope row formats. */ + ut_a(atomic_blobs); + } + + if (!atomic_blobs) { + /* Barracuda row formats COMPRESSED and DYNAMIC build on + the page structure introduced for the COMPACT row format + by allowing long fields to be broken into prefix and + externally stored parts. */ + ut_a(!post_antelope); + ut_a(zip_ssize == 0); + } else { + ut_a(post_antelope); + + /* Validate the zip shift size is within allowed range. */ + ut_a(zip_ssize <= PAGE_ZIP_SSIZE_MAX); + } + + /* The page size field can be used for any row type, or it may + be zero for an original 16k page size. + Validate the page shift size is within allowed range. */ + ut_a(page_ssize <= UNIV_PAGE_SSIZE_MAX); + ut_a((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) || (page_ssize)); + +#if UNIV_FORMAT_MAX != UNIV_FORMAT_B +# error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations." +#endif + + /* Return the flags sent in if we did not fail an assert. */ + return(flags); +} + +/********************************************************************//** +Determine if the tablespace is compressed from dict_table_t::flags. +@return TRUE if compressed, FALSE if not compressed */ +UNIV_INLINE +ibool +fsp_flags_is_compressed( +/*====================*/ + ulint flags) /*!< in: tablespace flags */ +{ + return(FSP_FLAGS_GET_ZIP_SSIZE(flags) != 0); +} + +#endif /* !UNIV_INNOCHECKSUM */ + +/********************************************************************//** +Extract the zip size from tablespace flags. +@return compressed page size of the file-per-table tablespace in bytes, +or zero if the table is not compressed. */ +UNIV_INLINE +ulint +fsp_flags_get_zip_size( +/*===================*/ + ulint flags) /*!< in: tablespace flags */ +{ + ulint zip_size = 0; + ulint ssize = FSP_FLAGS_GET_ZIP_SSIZE(flags); + + /* Convert from a 'log2 minus 9' to a page size in bytes. */ + if (ssize) { + zip_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize); + + ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); + } + + return(zip_size); +} + /********************************************************************//** Extract the page size from tablespace flags. -This feature, storing the page_ssize into the tablespace flags, is added -to InnoDB 5.6.4. This is here only to protect against a crash if a newer -database is opened with this code branch. @return page size of the tablespace in bytes */ UNIV_INLINE ulint @@ -60,14 +156,61 @@ fsp_flags_get_page_size( /* Convert from a 'log2 minus 9' to a page size in bytes. */ if (UNIV_UNLIKELY(ssize)) { - page_size = (512 << ssize); + page_size = ((UNIV_ZIP_SIZE_MIN >> 1) << ssize); - ut_ad(page_size <= UNIV_PAGE_SIZE); + ut_ad(page_size <= UNIV_PAGE_SIZE_MAX); } else { /* If the page size was not stored, then it is the original 16k. */ - page_size = UNIV_PAGE_SIZE; + page_size = UNIV_PAGE_SIZE_ORIG; } return(page_size); } + +#ifndef UNIV_INNOCHECKSUM + +/********************************************************************//** +Add the page size to the tablespace flags. +@return tablespace flags after page size is added */ +UNIV_INLINE +ulint +fsp_flags_set_page_size( +/*====================*/ + ulint flags, /*!< in: tablespace flags */ + ulint page_size) /*!< in: page size in bytes */ +{ + ulint ssize = 0; + ulint shift; + + /* Page size should be > UNIV_PAGE_SIZE_MIN */ + ut_ad(page_size >= UNIV_PAGE_SIZE_MIN); + ut_ad(page_size <= UNIV_PAGE_SIZE_MAX); + + if (page_size == UNIV_PAGE_SIZE_ORIG) { + ut_ad(0 == FSP_FLAGS_GET_PAGE_SSIZE(flags)); + return(flags); + } + + for (shift = UNIV_PAGE_SIZE_SHIFT_MAX; + shift >= UNIV_PAGE_SIZE_SHIFT_MIN; + shift--) { + ulint mask = (1 << shift); + if (page_size & mask) { + ut_ad(!(page_size & ~mask)); + ssize = shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1; + break; + } + } + + ut_ad(ssize); + ut_ad(ssize <= UNIV_PAGE_SSIZE_MAX); + + flags = FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize); + + ut_ad(flags == fsp_flags_validate(flags)); + + return(flags); +} + +#endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h index 496081c2346..94fd908ab0c 100644 --- a/storage/innobase/include/fsp0types.h +++ b/storage/innobase/include/fsp0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -42,7 +42,13 @@ fseg_alloc_free_page) */ /* @} */ /** File space extent size (one megabyte) in pages */ -#define FSP_EXTENT_SIZE (1 << (20 - UNIV_PAGE_SIZE_SHIFT)) +#define FSP_EXTENT_SIZE (1048576U / UNIV_PAGE_SIZE) + +/** File space extent size (one megabyte) in pages for MAX page size */ +#define FSP_EXTENT_SIZE_MAX (1048576 / UNIV_PAGE_SIZE_MAX) + +/** File space extent size (one megabyte) in pages for MIN page size */ +#define FSP_EXTENT_SIZE_MIN (1048576 / UNIV_PAGE_SIZE_MIN) /** On a page of any file segment, data may be put starting from this offset */ diff --git a/storage/innobase/include/fts0ast.h b/storage/innobase/include/fts0ast.h new file mode 100644 index 00000000000..da40e2bbc96 --- /dev/null +++ b/storage/innobase/include/fts0ast.h @@ -0,0 +1,257 @@ +/***************************************************************************** + +Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0ast.h +The FTS query parser (AST) abstract syntax tree routines + +Created 2007/03/16/03 Sunny Bains +*******************************************************/ + +#ifndef INNOBASE_FST0AST_H +#define INNOBASE_FST0AST_H + +#include "mem0mem.h" + +/* The type of AST Node */ +enum fts_ast_type_enum { + FTS_AST_OPER, /*!< Operator */ + FTS_AST_NUMB, /*!< Number */ + FTS_AST_TERM, /*!< Term (or word) */ + FTS_AST_TEXT, /*!< Text string */ + FTS_AST_LIST, /*!< Expression list */ + FTS_AST_SUBEXP_LIST /*!< Sub-Expression list */ +}; + +/* The FTS query operators that we support */ +enum fts_ast_oper_enum { + FTS_NONE, /*!< No operator */ + + FTS_IGNORE, /*!< Ignore rows that contain + this word */ + + FTS_EXIST, /*!< Include rows that contain + this word */ + + FTS_NEGATE, /*!< Include rows that contain + this word but rank them + lower*/ + + FTS_INCR_RATING, /*!< Increase the rank for this + word*/ + + FTS_DECR_RATING, /*!< Decrease the rank for this + word*/ + + FTS_DISTANCE /*!< Proximity distance */ +}; + +/* Enum types used by the FTS parser */ +typedef enum fts_ast_type_enum fts_ast_type_t; +typedef enum fts_ast_oper_enum fts_ast_oper_t; + +/* Data types used by the FTS parser */ +typedef struct fts_lexer_struct fts_lexer_t; +typedef struct fts_ast_text_struct fts_ast_text_t; +typedef struct fts_ast_term_struct fts_ast_term_t; +typedef struct fts_ast_node_struct fts_ast_node_t; +typedef struct fts_ast_list_struct fts_ast_list_t; +typedef struct fts_ast_state_struct fts_ast_state_t; + +typedef ulint (*fts_ast_callback)(fts_ast_oper_t, fts_ast_node_t*, void*); + +/******************************************************************** +Parse the string using the lexer setup within state.*/ +int +fts_parse( +/*======*/ + /* out: 0 on OK, 1 on error */ + fts_ast_state_t* state); /*!< in: ast state instance.*/ + +/******************************************************************** +Create an AST operator node */ +extern +fts_ast_node_t* +fts_ast_create_node_oper( +/*=====================*/ + void* arg, /*!< in: ast state */ + fts_ast_oper_t oper); /*!< in: ast operator */ +/******************************************************************** +Create an AST term node, makes a copy of ptr */ +extern +fts_ast_node_t* +fts_ast_create_node_term( +/*=====================*/ + void* arg, /*!< in: ast state */ + const char* ptr); /*!< in: term string */ +/******************************************************************** +Create an AST text node */ +extern +fts_ast_node_t* +fts_ast_create_node_text( +/*=====================*/ + void* arg, /*!< in: ast state */ + const char* ptr); /*!< in: text string */ +/******************************************************************** +Create an AST expr list node */ +extern +fts_ast_node_t* +fts_ast_create_node_list( +/*=====================*/ + void* arg, /*!< in: ast state */ + fts_ast_node_t* expr); /*!< in: ast expr */ +/******************************************************************** +Create a sub-expression list node. This function takes ownership of +expr and is responsible for deleting it. */ +extern +fts_ast_node_t* +fts_ast_create_node_subexp_list( +/*============================*/ + /* out: new node */ + void* arg, /*!< in: ast state instance */ + fts_ast_node_t* expr); /*!< in: ast expr instance */ +/******************************************************************** +Set the wildcard attribute of a term.*/ +extern +void +fts_ast_term_set_wildcard( +/*======================*/ + fts_ast_node_t* node); /*!< in: term to change */ +/******************************************************************** +Set the proximity attribute of a text node. */ + +void +fts_ast_term_set_distance( +/*======================*/ + fts_ast_node_t* node, /*!< in/out: text node */ + ulint distance); /*!< in: the text proximity + distance */ +/********************************************************************//** +Free a fts_ast_node_t instance. +@return next node to free */ +UNIV_INTERN +fts_ast_node_t* +fts_ast_free_node( +/*==============*/ + fts_ast_node_t* node); /*!< in: node to free */ +/******************************************************************** +Add a sub-expression to an AST*/ +extern +fts_ast_node_t* +fts_ast_add_node( +/*=============*/ + fts_ast_node_t* list, /*!< in: list node instance */ + fts_ast_node_t* node); /*!< in: (sub) expr to add */ +/******************************************************************** +Print the AST node recursively.*/ +extern +void +fts_ast_node_print( +/*===============*/ + fts_ast_node_t* node); /*!< in: ast node to print */ +/******************************************************************** +For tracking node allocations, in case there is an during parsing.*/ +extern +void +fts_ast_state_add_node( +/*===================*/ + fts_ast_state_t*state, /*!< in: ast state instance */ + fts_ast_node_t* node); /*!< in: node to add to state */ +/******************************************************************** +Free node and expr allocations.*/ +extern +void +fts_ast_state_free( +/*===============*/ + fts_ast_state_t*state); /*!< in: state instance + to free */ +/******************************************************************** +Traverse the AST.*/ +ulint +fts_ast_visit( +/*==========*/ + fts_ast_oper_t oper, /*!< in: FTS operator */ + fts_ast_node_t* node, /*!< in: instance to traverse*/ + fts_ast_callback visitor, /*!< in: callback */ + void* arg); /*!< in: callback arg */ +/******************************************************************** +Traverse the sub expression list.*/ +ulint +fts_ast_visit_sub_exp( +/*==========*/ + fts_ast_node_t* node, /*!< in: instance to traverse*/ + fts_ast_callback visitor, /*!< in: callback */ + void* arg); /*!< in: callback arg */ +/******************************************************************** +Create a lex instance.*/ +fts_lexer_t* +fts_lexer_create( +/*=============*/ + ibool boolean_mode, /*!< in: query type */ + const byte* query, /*!< in: query string */ + ulint query_len); /*!< in: query string len */ +/******************************************************************** +Free an fts_lexer_t instance.*/ +void +fts_lexer_free( +/*===========*/ + fts_lexer_t* fts_lexer); /*!< in: lexer instance to + free */ + +/* Query term type */ +struct fts_ast_term_struct { + byte* ptr; /*!< Pointer to term string.*/ + ibool wildcard; /*!< TRUE if wild card set.*/ +}; + +/* Query text type */ +struct fts_ast_text_struct { + byte* ptr; /*!< Pointer to term string.*/ + ulint distance; /*!< > 0 if proximity distance + set */ +}; + +/* The list of nodes in an expr list */ +struct fts_ast_list_struct { + fts_ast_node_t* head; /*!< Children list head */ + fts_ast_node_t* tail; /*!< Children list tail */ +}; + +/* FTS AST node to store the term, text, operator and sub-expressions.*/ +struct fts_ast_node_struct { + fts_ast_type_t type; /*!< The type of node */ + fts_ast_text_t text; /*!< Text node */ + fts_ast_term_t term; /*!< Term node */ + fts_ast_oper_t oper; /*!< Operator value */ + fts_ast_list_t list; /*!< Expression list */ + fts_ast_node_t* next; /*!< Link for expr list */ + fts_ast_node_t* next_alloc; /*!< For tracking allocations */ +}; + +/* To track state during parsing */ +struct fts_ast_state_struct { + mem_heap_t* heap; /*!< Heap to use for alloc */ + fts_ast_node_t* root; /*!< If all goes OK, then this + will point to the root.*/ + + fts_ast_list_t list; /*!< List of nodes allocated */ + + fts_lexer_t* lexer; /*!< Lexer callback + arg */ +}; + +#endif /* INNOBASE_FSTS0AST_H */ diff --git a/storage/innobase/include/fts0blex.h b/storage/innobase/include/fts0blex.h new file mode 100644 index 00000000000..6f8d6eaeb29 --- /dev/null +++ b/storage/innobase/include/fts0blex.h @@ -0,0 +1,349 @@ +#ifndef fts0bHEADER_H +#define fts0bHEADER_H 1 +#define fts0bIN_HEADER 1 + +#line 6 "../include/fts0blex.h" + +#line 8 "../include/fts0blex.h" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 35 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include <inttypes.h> +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +/* C99 requires __STDC__ to be defined as 1. */ +#if defined (__STDC__) + +#define YY_USE_CONST + +#endif /* defined (__STDC__) */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +void fts0brestart (FILE *input_file ,yyscan_t yyscanner ); +void fts0b_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +YY_BUFFER_STATE fts0b_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); +void fts0b_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void fts0b_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void fts0bpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +void fts0bpop_buffer_state (yyscan_t yyscanner ); + +YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); +YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); +YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); + +void *fts0balloc (yy_size_t ,yyscan_t yyscanner ); +void *fts0brealloc (void *,yy_size_t ,yyscan_t yyscanner ); +void fts0bfree (void * ,yyscan_t yyscanner ); + +/* Begin user sect3 */ + +#define fts0bwrap(n) 1 +#define YY_SKIP_YYWRAP + +#define yytext_ptr yytext_r + +#ifdef YY_HEADER_EXPORT_START_CONDITIONS +#define INITIAL 0 + +#endif + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include <unistd.h> +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +int fts0blex_init (yyscan_t* scanner); + +int fts0blex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int fts0blex_destroy (yyscan_t yyscanner ); + +int fts0bget_debug (yyscan_t yyscanner ); + +void fts0bset_debug (int debug_flag ,yyscan_t yyscanner ); + +YY_EXTRA_TYPE fts0bget_extra (yyscan_t yyscanner ); + +void fts0bset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); + +FILE *fts0bget_in (yyscan_t yyscanner ); + +void fts0bset_in (FILE * in_str ,yyscan_t yyscanner ); + +FILE *fts0bget_out (yyscan_t yyscanner ); + +void fts0bset_out (FILE * out_str ,yyscan_t yyscanner ); + +int fts0bget_leng (yyscan_t yyscanner ); + +char *fts0bget_text (yyscan_t yyscanner ); + +int fts0bget_lineno (yyscan_t yyscanner ); + +void fts0bset_lineno (int line_number ,yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int fts0bwrap (yyscan_t yyscanner ); +#else +extern int fts0bwrap (yyscan_t yyscanner ); +#endif +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int fts0blex (yyscan_t yyscanner); + +#define YY_DECL int fts0blex (yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +#undef YY_NEW_FILE +#undef YY_FLUSH_BUFFER +#undef yy_set_bol +#undef yy_new_buffer +#undef yy_set_interactive +#undef YY_DO_BEFORE_ACTION + +#ifdef YY_DECL_IS_OURS +#undef YY_DECL_IS_OURS +#undef YY_DECL +#endif + +#line 73 "fts0blex.l" + + +#line 348 "../include/fts0blex.h" +#undef fts0bIN_HEADER +#endif /* fts0bHEADER_H */ diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h new file mode 100644 index 00000000000..e515772bdbd --- /dev/null +++ b/storage/innobase/include/fts0fts.h @@ -0,0 +1,1031 @@ +/***************************************************************************** + +Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0fts.h +Full text search header file + +Created 2011/09/02 Sunny Bains +***********************************************************************/ + +#ifndef fts0fts_h +#define fts0fts_h + +#include "univ.i" + +#include "data0type.h" +#include "data0types.h" +#include "dict0types.h" +#include "hash0hash.h" +#include "mem0mem.h" +#include "rem0types.h" +#include "row0types.h" +#include "trx0types.h" +#include "ut0vec.h" +#include "ut0rbt.h" +#include "ut0wqueue.h" +#include "que0types.h" +#include "ft_global.h" + +/** "NULL" value of a document id. */ +#define FTS_NULL_DOC_ID 0 + +/** FTS hidden column that is used to map to and from the row */ +#define FTS_DOC_ID_COL_NAME "FTS_DOC_ID" + +/** The name of the index created by FTS */ +#define FTS_DOC_ID_INDEX_NAME "FTS_DOC_ID_INDEX" + +#define FTS_DOC_ID_INDEX_NAME_LEN 16 + +/** Doc ID is a 8 byte value */ +#define FTS_DOC_ID_LEN 8 + +/** The number of fields to sort when we build FT index with +FIC. Three fields are sort: (word, doc_id, position) */ +#define FTS_NUM_FIELDS_SORT 3 + +/** Maximum number of rows in a table, smaller than which, we will +optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */ +#define MAX_DOC_ID_OPT_VAL 1073741824 + +/** Document id type. */ +typedef ib_uint64_t doc_id_t; + +/** doc_id_t printf format */ +#define FTS_DOC_ID_FORMAT IB_ID_FMT + +/** Convert document id to the InnoDB (BIG ENDIAN) storage format. */ +#define fts_write_doc_id(d, s) mach_write_to_8(d, s) + +/** Read a document id to internal format. */ +#define fts_read_doc_id(s) mach_read_from_8(s) + +/** Bind the doc id to a variable */ +#define fts_bind_doc_id(i, n, v) pars_info_bind_int8_literal(i, n, v) + +/** Defines for FTS query mode, they have the same values as +those defined in mysql file ft_global.h */ +#define FTS_NL 0 +#define FTS_BOOL 1 +#define FTS_SORTED 2 +#define FTS_EXPAND 4 +#define FTS_PROXIMITY 8 +#define FTS_PHRASE 16 + +#define FTS_INDEX_TABLE_IND_NAME "FTS_INDEX_TABLE_IND" + +/** Threshold where our optimize thread automatically kicks in */ +#define FTS_OPTIMIZE_THRESHOLD 10000000 + +#define FTS_DOC_ID_MAX_STEP 10000 +/** Variable specifying the FTS parallel sort degree */ +extern ulong fts_sort_pll_degree; + +/** Variable specifying the number of word to optimize for each optimize table +call */ +extern ulong fts_num_word_optimize; + +/** Variable specifying whether we do additional FTS diagnostic printout +in the log */ +extern char fts_enable_diag_print; + +/** FTS rank type, which will be between 0 .. 1 inclusive */ +typedef float fts_rank_t; + +/** Type of a row during a transaction. FTS_NOTHING means the row can be +forgotten from the FTS system's POV, FTS_INVALID is an internal value used +to mark invalid states. + +NOTE: Do not change the order or value of these, fts_trx_row_get_new_state +depends on them being exactly as they are. */ +typedef enum { + FTS_INSERT = 0, + FTS_MODIFY, + FTS_DELETE, + FTS_NOTHING, + FTS_INVALID +} fts_row_state; + +/** The FTS table types. */ +enum fts_table_type_enum { + FTS_INDEX_TABLE, /*!< FTS auxiliary table that is + specific to a particular FTS index + on a table */ + + FTS_COMMON_TABLE /*!< FTS auxiliary table that is common + for all FTS index on a table */ +}; + +typedef struct fts_struct fts_t; +typedef struct fts_doc_struct fts_doc_t; +typedef struct fts_trx_struct fts_trx_t; +typedef struct fts_table_struct fts_table_t; +typedef struct fts_cache_struct fts_cache_t; +typedef struct fts_token_struct fts_token_t; +typedef struct fts_string_struct fts_string_t; +typedef struct fts_result_struct fts_result_t; +typedef struct fts_ranking_struct fts_ranking_t; +typedef struct fts_trx_row_struct fts_trx_row_t; +typedef struct fts_doc_ids_struct fts_doc_ids_t; +typedef enum fts_table_type_enum fts_table_type_t; +typedef struct fts_trx_table_struct fts_trx_table_t; +typedef struct fts_savepoint_struct fts_savepoint_t; +typedef struct fts_index_cache_struct fts_index_cache_t; + + +/** Initialize the "fts_table" for internal query into FTS auxiliary +tables */ +#define FTS_INIT_FTS_TABLE(fts_table, m_suffix, m_type, m_table)\ +do { \ + (fts_table)->suffix = m_suffix; \ + (fts_table)->type = m_type; \ + (fts_table)->table_id = m_table->id; \ + (fts_table)->parent = m_table->name; \ + (fts_table)->table = m_table; \ +} while (0); + +#define FTS_INIT_INDEX_TABLE(fts_table, m_suffix, m_type, m_index)\ +do { \ + (fts_table)->suffix = m_suffix; \ + (fts_table)->type = m_type; \ + (fts_table)->table_id = m_index->table->id; \ + (fts_table)->parent = m_index->table->name; \ + (fts_table)->table = m_index->table; \ + (fts_table)->index_id = m_index->id; \ +} while (0); + +/** Information about changes in a single transaction affecting +the FTS system. */ +struct fts_trx_struct { + trx_t* trx; /*!< InnoDB transaction */ + + ib_vector_t* savepoints; /*!< Active savepoints, must have at + least one element, the implied + savepoint */ + ib_vector_t* last_stmt; /*!< last_stmt */ + + mem_heap_t* heap; /*!< heap */ +}; + +/** Information required for transaction savepoint handling. */ +struct fts_savepoint_struct { + char* name; /*!< First entry is always NULL, the + default instance. Otherwise the name + of the savepoint */ + + ib_rbt_t* tables; /*!< Modified FTS tables */ +}; + +/** Information about changed rows in a transaction for a single table. */ +struct fts_trx_table_struct { + dict_table_t* table; /*!< table */ + + fts_trx_t* fts_trx; /*!< link to parent */ + + ib_rbt_t* rows; /*!< rows changed; indexed by doc-id, + cells are fts_trx_row_t* */ + + fts_doc_ids_t* added_doc_ids; /*!< list of added doc ids (NULL until + the first addition) */ + + /*!< for adding doc ids */ + que_t* docs_added_graph; +}; + +/** Information about one changed row in a transaction. */ +struct fts_trx_row_struct { + doc_id_t doc_id; /*!< Id of the ins/upd/del document */ + + fts_row_state state; /*!< state of the row */ + + ib_vector_t* fts_indexes; /*!< The indexes that are affected */ +}; + +/** List of document ids that were added during a transaction. This +list is passed on to a background 'Add' thread and OPTIMIZE, so it +needs its own memory heap. */ +struct fts_doc_ids_struct { + ib_vector_t* doc_ids; /*!< document ids (each element is + of type doc_id_t). */ + + ib_alloc_t* self_heap; /*!< Allocator used to create an + instance of this type and the + doc_ids vector */ +}; + +// FIXME: Get rid of this if possible. +/** Since MySQL's character set support for Unicode is woefully inadequate +(it supports basic operations like isalpha etc. only for 8-bit characters), +we have to implement our own. We use UTF-16 without surrogate processing +as our in-memory format. This typedef is a single such character. */ +typedef unsigned short ib_uc_t; + +/** An UTF-16 ro UTF-8 string. */ +struct fts_string_struct { + byte* f_str; /*!< string, not necessary terminated in + any way */ + ulint f_len; /*!< Length of the string in bytes */ + ulint f_n_char; /*!< Number of characters */ +}; + +/** Query ranked doc ids. */ +struct fts_ranking_struct { + doc_id_t doc_id; /*!< Document id */ + + fts_rank_t rank; /*!< Rank is between 0 .. 1 */ + + ib_rbt_t* words; /*!< RB Tree of type byte*, this + contains the words that were queried + and found in this document */ +}; + +/** Query result. */ +struct fts_result_struct { + ib_rbt_node_t* current; /*!< Current element */ + + ib_rbt_t* rankings_by_id; /*!< RB tree of type fts_ranking_t + indexed by doc id */ + ib_rbt_t* rankings_by_rank;/*!< RB tree of type fts_ranking_t + indexed by rank */ +}; + +/** This is used to generate the FTS auxiliary table name, we need the +table id and the index id to generate the column specific FTS auxiliary +table name. */ +struct fts_table_struct { + const char* parent; /*!< Parent table name, this is + required only for the database + name */ + + fts_table_type_t + type; /*!< The auxiliary table type */ + + table_id_t table_id; /*!< The table id */ + + index_id_t index_id; /*!< The index id */ + + const char* suffix; /*!< The suffix of the fts auxiliary + table name, can be NULL, not used + everywhere (yet) */ + const dict_table_t* + table; /*!< Parent table */ + CHARSET_INFO* charset; /*!< charset info if it is for FTS + index auxiliary table */ +}; + +enum fts_status { + BG_THREAD_STOP = 1, /*!< TRUE if the FTS background thread + has finished reading the ADDED table, + meaning more items can be added to + the table. */ + + BG_THREAD_READY = 2, /*!< TRUE if the FTS background thread + is ready */ + + ADD_THREAD_STARTED = 4, /*!< TRUE if the FTS add thread + has started */ + + ADDED_TABLE_SYNCED = 8, /*!< TRUE if the ADDED table record is + sync-ed after crash recovery */ + + TABLE_DICT_LOCKED = 16 /*!< Set if the table has + dict_sys->mutex */ +}; + +typedef enum fts_status fts_status_t; + +/** The state of the FTS sub system. */ +struct fts_struct { + /*!< mutex protecting bg_threads* and + fts_add_wq. */ + mutex_t bg_threads_mutex; + + ulint bg_threads; /*!< number of background threads + accessing this table */ + + /*!< TRUE if background threads running + should stop themselves */ + ulint fts_status; /*!< Status bit regarding fts + running state */ + + ib_wqueue_t* add_wq; /*!< Work queue for scheduling jobs + for the FTS 'Add' thread, or NULL + if the thread has not yet been + created. Each work item is a + fts_trx_doc_ids_t*. */ + + fts_cache_t* cache; /*!< FTS memory buffer for this table, + or NULL if the table has no FTS + index. */ + + ulint doc_col; /*!< FTS doc id hidden column number + in the CLUSTERED index. */ + + ib_vector_t* indexes; /*!< Vector of FTS indexes, this is + mainly for caching purposes. */ + mem_heap_t* fts_heap; /*!< heap for fts_struct allocation */ +}; + +typedef struct fts_stopword_struct fts_stopword_t; + +/** status bits for fts_stopword_t status field. */ +#define STOPWORD_NOT_INIT 0x1 +#define STOPWORD_OFF 0x2 +#define STOPWORD_FROM_DEFAULT 0x4 +#define STOPWORD_USER_TABLE 0x8 + +extern const char* fts_default_stopword[]; + +/** Variable specifying the maximum FTS cache size for each table */ +extern ulong fts_max_cache_size; + +/** Variable specifying the maximum FTS max token size */ +extern ulong fts_max_token_size; + +/** Variable specifying the minimum FTS max token size */ +extern ulong fts_min_token_size; + +/** Maximum possible Fulltext word length */ +#define FTS_MAX_WORD_LEN 3 * HA_FT_MAXCHARLEN + +/** Variable specifying the table that has Fulltext index to display its +content through information schema table */ +extern char* fts_internal_tbl_name; + +#define fts_que_graph_free(graph) \ +do { \ + mutex_enter(&dict_sys->mutex); \ + que_graph_free(graph); \ + mutex_exit(&dict_sys->mutex); \ +} while (0) + +/******************************************************************//** +Create a FTS cache. */ +UNIV_INTERN +fts_cache_t* +fts_cache_create( +/*=============*/ + dict_table_t* table); /*!< table owns the FTS cache */ + +/******************************************************************//** +Create a FTS index cache. +@return Index Cache */ +UNIV_INTERN +fts_index_cache_t* +fts_cache_index_cache_create( +/*=========================*/ + dict_table_t* table, /*!< in: table with FTS index */ + dict_index_t* index); /*!< in: FTS index */ + +/******************************************************************//** +Get the next available document id. This function creates a new +transaction to generate the document id. */ +UNIV_INTERN +ulint +fts_get_next_doc_id( +/*================*/ + /*!< out: DB_SUCCESS if OK */ + const dict_table_t* table, /*!< in: table */ + doc_id_t* doc_id); /*!< out: new document id */ + +/*********************************************************************//** +Update the next and last Doc ID in the CONFIG table to be the input +"doc_id" value (+ 1). We would do so after each FTS index build or +table truncate */ +UNIV_INTERN +void +fts_update_next_doc_id( +/*===================*/ + const dict_table_t* table, /*!< in: table */ + const char* table_name, /*!< in: table name */ + doc_id_t doc_id); /*!< in: DOC ID to set */ + +/******************************************************************//** +Update the last document id. This function could create a new +transaction to update the last document id. */ +UNIV_INTERN +ulint +fts_update_sync_doc_id( +/*===================*/ + /*!< out: DB_SUCCESS if OK */ + const dict_table_t* table, /*!< in: table */ + const char* table_name, /*!< in: table name */ + doc_id_t doc_id, /*!< in: last document id */ + trx_t* trx); /*!< in: update trx */ + +/******************************************************************//** +Create a new document id . +@return DB_SUCCESS if all went well else error */ +UNIV_INTERN +ulint +fts_create_doc_id( +/*==============*/ + dict_table_t* table, /*!< in: row is of this + table. */ + dtuple_t* row, /*!< in/out: add doc id + value to this row. This is the + current row that is being + inserted. */ + mem_heap_t* heap); /*!< in: heap */ + +/******************************************************************//** +Create a new fts_doc_ids_t. +@return new fts_doc_ids_t. */ +UNIV_INTERN +fts_doc_ids_t* +fts_doc_ids_create(void); +/*=====================*/ + +/******************************************************************//** +Free a fts_doc_ids_t. */ +UNIV_INTERN +void +fts_doc_ids_free( +/*=============*/ + fts_doc_ids_t* doc_ids); /*!< in: doc_ids to free */ + +/******************************************************************//** +Notify the FTS system about an operation on an FTS-indexed table. */ +UNIV_INTERN +void +fts_trx_add_op( +/*===========*/ + trx_t* trx, /*!< in: InnoDB transaction */ + dict_table_t* table, /*!< in: table */ + doc_id_t doc_id, /*!< in: doc id */ + fts_row_state state, /*!< in: state of the row */ + ib_vector_t* fts_indexes) /*!< in: FTS indexes affected + (NULL=all) */ + __attribute__((nonnull(1,2))); + +/******************************************************************//** +Free an FTS trx. */ +UNIV_INTERN +void +fts_trx_free( +/*=========*/ + fts_trx_t* fts_trx); /*!< in, own: FTS trx */ + +/******************************************************************//** +Creates the common ancillary tables needed for supporting an FTS index +on the given table. row_mysql_lock_data_dictionary must have been +called before this. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_create_common_tables( +/*=====================*/ + trx_t* trx, /*!< in: transaction handle */ + const dict_table_t* + table, /*!< in: table with one FTS + index */ + const char* name, /*!< in: table name */ + ibool skip_doc_id_index); + /*!< in: Skip index on doc id */ +/******************************************************************//** +Wrapper function of fts_create_index_tables_low(), create auxiliary +tables for an FTS index +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_create_index_tables( +/*====================*/ + trx_t* trx, /*!< in: transaction handle */ + const dict_index_t* index); /*!< in: the FTS index + instance */ + +/******************************************************************//** +Creates the column specific ancillary tables needed for supporting an +FTS index on the given table. row_mysql_lock_data_dictionary must have +been called before this. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_create_index_tables_low( +/*========================*/ + trx_t* trx, /*!< in: transaction handle */ + const dict_index_t* + index, /*!< in: the FTS index + instance */ + const char* table_name, /*!< in: the table name */ + table_id_t table_id); /*!< in: the table id */ + +/******************************************************************//** +Add the FTS document id hidden column. */ +UNIV_INTERN +void +fts_add_doc_id_column( +/*==================*/ + dict_table_t* table); /*!< in/out: Table with + FTS index */ + +/*********************************************************************//** +Drops the ancillary tables needed for supporting an FTS index on the +given table. row_mysql_lock_data_dictionary must have been called before +this. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_drop_tables( +/*============*/ + trx_t* trx, /*!< in: transaction */ + dict_table_t* table); /*!< in: table has the FTS + index */ + +/******************************************************************//** +The given transaction is about to be committed; do whatever is necessary +from the FTS system's POV. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_commit( +/*=======*/ + trx_t* trx); /*!< in: transaction */ + +/*******************************************************************//** +FTS Query entry point. +@return DB_SUCCESS if successful otherwise error code */ +UNIV_INTERN +ulint +fts_query( +/*======*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index to search */ + uint flags, /*!< in: FTS search mode */ + const byte* query, /*!< in: FTS query */ + ulint query_len, /*!< in: FTS query string len + in bytes */ + fts_result_t** result); /*!< out: query result, to be + freed by the caller.*/ + +/******************************************************************//** +Retrieve the FTS Relevance Ranking result for doc with doc_id +@return the relevance ranking value. */ +UNIV_INTERN +float +fts_retrieve_ranking( +/*=================*/ + fts_result_t* result, /*!< in: FTS result structure */ + doc_id_t doc_id); /*!< in: the interested document + doc_id */ + +/******************************************************************//** +FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */ +UNIV_INTERN +void +fts_query_sort_result_on_rank( +/*==========================*/ + fts_result_t* result); /*!< out: result instance + to sort.*/ + +/******************************************************************//** +FTS Query free result, returned by fts_query(). */ +UNIV_INTERN +void +fts_query_free_result( +/*==================*/ + fts_result_t* result); /*!< in: result instance + to free.*/ + +/******************************************************************//** +Extract the doc id from the FTS hidden column. */ +UNIV_INTERN +doc_id_t +fts_get_doc_id_from_row( +/*====================*/ + dict_table_t* table, /*!< in: table */ + dtuple_t* row); /*!< in: row whose FTS doc id we + want to extract.*/ + +/******************************************************************//** +Extract the doc id from the FTS hidden column. */ +UNIV_INTERN +doc_id_t +fts_get_doc_id_from_rec( +/*====================*/ + dict_table_t* table, /*!< in: table */ + const rec_t* rec, /*!< in: rec */ + mem_heap_t* heap); /*!< in: heap */ + +/******************************************************************//** +Update the query graph with a new document id. +@return Doc ID used */ +UNIV_INTERN +doc_id_t +fts_update_doc_id( +/*==============*/ + dict_table_t* table, /*!< in: table */ + upd_field_t* ufield, /*!< out: update node */ + doc_id_t* next_doc_id); /*!< out: buffer for writing */ + +/******************************************************************//** +FTS initialize. */ +UNIV_INTERN +void +fts_startup(void); +/*==============*/ + +/******************************************************************//** +Signal FTS threads to initiate shutdown. */ +UNIV_INTERN +void +fts_start_shutdown( +/*===============*/ + dict_table_t* table, /*!< in: table with FTS + indexes */ + fts_t* fts); /*!< in: fts instance to + shutdown */ + +/******************************************************************//** +Wait for FTS threads to shutdown. */ +UNIV_INTERN +void +fts_shutdown( +/*=========*/ + dict_table_t* table, /*!< in: table with FTS + indexes */ + fts_t* fts); /*!< in: fts instance to + shutdown */ + +/******************************************************************//** +Create an instance of fts_t. +@return instance of fts_t */ +UNIV_INTERN +fts_t* +fts_create( +/*=======*/ + dict_table_t* table); /*!< out: table with FTS + indexes */ + +/**********************************************************************//** +Free the FTS resources. */ +UNIV_INTERN +void +fts_free( +/*=====*/ + dict_table_t* table); /*!< in/out: table with + FTS indexes */ + +/*********************************************************************//** +Run OPTIMIZE on the given table. +@return DB_SUCCESS if all OK */ +UNIV_INTERN +ulint +fts_optimize_table( +/*===============*/ + dict_table_t* table); /*!< in: table to optimiza */ + +/**********************************************************************//** +Startup the optimize thread and create the work queue. */ +UNIV_INTERN +void +fts_optimize_init(void); +/*====================*/ + +/**********************************************************************//** +Check whether the work queue is initialized. +@return TRUE if optimze queue is initialized. */ +UNIV_INTERN +ibool +fts_optimize_is_init(void); +/*======================*/ + +/****************************************************************//** +Drops index ancillary tables for a FTS index +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_drop_index_tables( +/*==================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index); /*!< in: Index to drop */ + +/******************************************************************//** +Remove the table from the OPTIMIZER's list. We do wait for +acknowledgement from the consumer of the message. */ +UNIV_INTERN +void +fts_optimize_remove_table( +/*======================*/ + dict_table_t* table); /*!< in: table to remove */ + +/**********************************************************************//** +Signal the optimize thread to prepare for shutdown. */ +UNIV_INTERN +void +fts_optimize_start_shutdown(void); +/*==============================*/ + +/**********************************************************************//** +Inform optimize to clean up. */ +UNIV_INTERN +void +fts_optimize_end(void); +/*===================*/ + +/**********************************************************************//** +Take a FTS savepoint. +@return DB_SUCCESS or error code */ +UNIV_INTERN +void +fts_savepoint_take( +/*===============*/ + trx_t* trx, /*!< in: transaction */ + const char* name); /*!< in: savepoint name */ + +/**********************************************************************//** +Refresh last statement savepoint. +@return DB_SUCCESS or error code */ +UNIV_INTERN +void +fts_savepoint_laststmt_refresh( +/*===========================*/ + trx_t* trx); /*!< in: transaction */ + +/**********************************************************************//** +Release the savepoint data identified by name. */ +UNIV_INTERN +void +fts_savepoint_release( +/*==================*/ + trx_t* trx, /*!< in: transaction */ + const char* name); /*!< in: savepoint name */ + +/**********************************************************************//** +Free the FTS cache. */ +UNIV_INTERN +void +fts_cache_destroy( +/*==============*/ + fts_cache_t* cache); /*!< in: cache*/ + +/*********************************************************************//** +Clear cache. If the shutdown flag is TRUE then the cache can contain +data that needs to be freed. For regular clear as part of normal +working we assume the caller has freed all resources. */ +UNIV_INTERN +void +fts_cache_clear( +/*============*/ + fts_cache_t* cache, /*!< in: cache */ + ibool free_words); /*!< in: TRUE if free + in memory word cache. */ + +/*********************************************************************//** +Initialize things in cache. */ +UNIV_INTERN +void +fts_cache_init( +/*===========*/ + fts_cache_t* cache); /*!< in: cache */ + +/*********************************************************************//** +Rollback to and including savepoint indentified by name. */ +UNIV_INTERN +void +fts_savepoint_rollback( +/*===================*/ + trx_t* trx, /*!< in: transaction */ + const char* name); /*!< in: savepoint name */ + +/*********************************************************************//** +Rollback to and including savepoint indentified by name. */ +UNIV_INTERN +void +fts_savepoint_rollback_last_stmt( +/*=============================*/ + trx_t* trx); /*!< in: transaction */ + +/***********************************************************************//** +Drop all orphaned FTS auxiliary tables, those that don't have a parent +table or FTS index defined on them. */ +UNIV_INTERN +void +fts_drop_orphaned_tables(void); +/*==========================*/ + +/******************************************************************//** +Since we do a horizontal split on the index table, we need to drop the +all the split tables. */ +UNIV_INTERN +ulint +fts_drop_index_split_tables( +/*========================*/ + /*!< out: DB_SUCCESS + or error code */ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index); /*!< in: fts instance */ + +/****************************************************************//** +Run SYNC on the table, i.e., write out data from the cache to the +FTS auxiliary INDEX table and clear the cache at the end. +@return DB_SUCCESS if all OK */ +UNIV_INTERN +ulint +fts_sync_table( +/*===========*/ + dict_table_t* table); /*!< in: table */ + +/****************************************************************//** +Free the query graph but check whether dict_sys->mutex is already +held */ +UNIV_INTERN +void +fts_que_graph_free_check_lock( +/*==========================*/ + fts_table_t* fts_table, /*!< in: FTS table */ + const fts_index_cache_t*index_cache, /*!< in: FTS index cache */ + que_t* graph); /*!< in: query graph */ + +/****************************************************************//** +Create an FTS index cache. */ +UNIV_INTERN +CHARSET_INFO* +fts_index_get_charset( +/*==================*/ + dict_index_t* index); /*!< in: FTS index */ + +/*********************************************************************//** +Get the initial Doc ID by consulting the ADDED and the CONFIG table +@return initial Doc ID */ +UNIV_INTERN +doc_id_t +fts_init_doc_id( +/*============*/ + const dict_table_t* table); /*!< in: table */ + +/******************************************************************//** +compare two character string according to their charset. */ +extern +int +innobase_fts_text_cmp( +/*==================*/ + const void* cs, /*!< in: Character set */ + const void* p1, /*!< in: key */ + const void* p2); /*!< in: node */ + +/******************************************************************//** +Makes all characters in a string lower case. */ +extern +size_t +innobase_fts_casedn_str( +/*====================*/ + CHARSET_INFO* cs, /*!< in: Character set */ + char* src, /*!< in: string to put in + lower case */ + size_t src_len, /*!< in: input string length */ + char* dst, /*!< in: buffer for result + string */ + size_t dst_len); /*!< in: buffer size */ + + +/******************************************************************//** +compare two character string according to their charset. */ +extern +int +innobase_fts_text_cmp_prefix( +/*=========================*/ + const void* cs, /*!< in: Character set */ + const void* p1, /*!< in: key */ + const void* p2); /*!< in: node */ + +/*************************************************************//** +Get the next token from the given string and store it in *token. */ +extern +ulint +innobase_mysql_fts_get_token( +/*=========================*/ + CHARSET_INFO* charset, /*!< in: Character set */ + byte* start, /*!< in: start of text */ + byte* end, /*!< in: one character past + end of text */ + fts_string_t* token, /*!< out: token's text */ + ulint* offset); /*!< out: offset to token, + measured as characters from + 'start' */ + +/*********************************************************************//** +Fetch COUNT(*) from specified table. +@return the number of rows in the table */ +UNIV_INTERN +ulint +fts_get_rows_count( +/*===============*/ + fts_table_t* fts_table); /*!< in: fts table to read */ + +/*************************************************************//** +Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists +@return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */ +UNIV_INTERN +doc_id_t +fts_get_max_doc_id( +/*===============*/ + dict_table_t* table); /*!< in: user table */ + +/******************************************************************//** +Check whether user supplied stopword table exists and is of +the right format. +@return TRUE if the table qualifies */ +UNIV_INTERN +ibool +fts_valid_stopword_table( +/*=====================*/ + const char* stopword_table_name); /*!< in: Stopword table + name */ +/****************************************************************//** +This function loads specified stopword into FTS cache +@return TRUE if success */ +UNIV_INTERN +ibool +fts_load_stopword( +/*==============*/ + const dict_table_t* + table, /*!< in: Table with FTS */ + trx_t* trx, /*!< in: Transaction */ + const char* global_stopword_table, /*!< in: Global stopword table + name */ + const char* session_stopword_table, /*!< in: Session stopword table + name */ + ibool stopword_is_on, /*!< in: Whether stopword + option is turned on/off */ + ibool reload); /*!< in: Whether it is during + reload of FTS table */ + +/****************************************************************//** +Create the vector of fts_get_doc_t instances. +@return vector of fts_get_doc_t instances */ +UNIV_INTERN +ib_vector_t* +fts_get_docs_create( +/*================*/ + fts_cache_t* cache); /*!< in: fts cache */ + +/****************************************************************//** +Read the rows from the FTS index +@return vector of rows fetched */ +UNIV_INTERN +ulint +fts_table_fetch_doc_ids( +/*====================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table, /*!< in: aux table */ + fts_doc_ids_t* doc_ids); /*!< in: For collecting + doc ids */ +/****************************************************************//** +This function loads the documents in "ADDED" table into FTS cache, +it also loads the stopword info into the FTS cache. +@return DB_SUCCESS if all OK */ +UNIV_INTERN +ibool +fts_init_index( +/*===========*/ + dict_table_t* table, /*!< in: Table with FTS */ + ibool has_cache_lock); /*!< in: Whether we already + have cache lock */ +/*******************************************************************//** +Add a newly create index in FTS cache */ +UNIV_INTERN +void +fts_add_index( +/*==========*/ + dict_index_t* index, /*!< FTS index to be added */ + dict_table_t* table); /*!< table */ + +/*******************************************************************//** +Drop auxiliary tables related to an FTS index +@return DB_SUCCESS or error number */ +UNIV_INTERN +ulint +fts_drop_index( +/*===========*/ + dict_table_t* table, /*!< in: Table where indexes are dropped */ + dict_index_t* index, /*!< in: Index to be dropped */ + trx_t* trx); /*!< in: Transaction for the drop */ + +/*******************************************************************//** +Check indexes in the fts->indexes is also present in index cache and +table->indexes list +@return TRUE if all indexes match */ +UNIV_INTERN +ibool +fts_check_cached_index( +/*===================*/ + dict_table_t* table); /*!< in: Table where indexes are dropped */ +#endif /*!< fts0fts.h */ + diff --git a/storage/innobase/include/fts0opt.h b/storage/innobase/include/fts0opt.h new file mode 100644 index 00000000000..92eaf8270d2 --- /dev/null +++ b/storage/innobase/include/fts0opt.h @@ -0,0 +1,37 @@ +/***************************************************************************** + +Copyright (c) 2001, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0opt.h +Full Text Search optimize thread + +Created 2011-02-15 Jimmy Yang +***********************************************************************/ +#ifndef INNODB_FTS0OPT_H +#define INNODB_FTS0OPT_H + +/******************************************************************** +Callback function to fetch the rows in an FTS INDEX record. */ +UNIV_INTERN +ibool +fts_optimize_index_fetch_node( +/*==========================*/ + /* out: always returns non-NULL */ + void* row, /* in: sel_node_t* */ + void* user_arg); /* in: pointer to ib_vector_t */ +#endif diff --git a/storage/innobase/include/fts0pars.h b/storage/innobase/include/fts0pars.h new file mode 100644 index 00000000000..ae5a55b2455 --- /dev/null +++ b/storage/innobase/include/fts0pars.h @@ -0,0 +1,74 @@ + +/* A Bison parser, made by GNU Bison 2.4.1. */ + +/* Skeleton interface for Bison's Yacc-like parsers in C + + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + FTS_OPER = 258, + FTS_TEXT = 259, + FTS_TERM = 260, + FTS_NUMB = 261 + }; +#endif + + + +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE +{ + +/* Line 1676 of yacc.c */ +#line 36 "fts0pars.y" + + int oper; + char* token; + fts_ast_node_t* node; + + + +/* Line 1676 of yacc.c */ +#line 66 "fts0pars.h" +} YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +#endif + + + + diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h new file mode 100644 index 00000000000..8524f988e47 --- /dev/null +++ b/storage/innobase/include/fts0priv.h @@ -0,0 +1,613 @@ +/***************************************************************************** + +Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0priv.h +Full text search internal header file + +Created 2011/09/02 Sunny Bains +***********************************************************************/ + +#ifndef INNOBASE_FTS0PRIV_H +#define INNOBASE_FTS0PRIV_H + +#include "dict0dict.h" +#include "pars0pars.h" +#include "que0que.h" +#include "que0types.h" +#include "fts0types.h" + +/* The various states of the FTS sub system pertaining to a table with +FTS indexes defined on it. */ +enum fts_table_state_enum { + /* !<This must be 0 since we insert + a hard coded '0' at create time + to the config table */ + + FTS_TABLE_STATE_RUNNING = 0, /*!< Auxiliary tables created OK */ + + FTS_TABLE_STATE_OPTIMIZING, /*!< This is a substate of RUNNING */ + + FTS_TABLE_STATE_DELETED /*!< All aux tables to be dropped when + it's safe to do so */ +}; + +typedef enum fts_table_state_enum fts_table_state_t; + +/** The default time to wait for the background thread (in microsecnds). */ +#define FTS_MAX_BACKGROUND_THREAD_WAIT 10000 + +/** Maximum number of iterations to wait before we complain */ +#define FTS_BACKGROUND_THREAD_WAIT_COUNT 1000 + +/** The maximum length of the config table's value column in bytes */ +#define FTS_MAX_CONFIG_NAME_LEN 64 + +/** The maximum length of the config table's value column in bytes */ +#define FTS_MAX_CONFIG_VALUE_LEN 1024 + +/** Approx. upper limit of ilist length in bytes. */ +#define FTS_ILIST_MAX_SIZE (64 * 1024) + +/** FTS config table name parameters */ + +/** The number of seconds after which an OPTIMIZE run will stop */ +#define FTS_OPTIMIZE_LIMIT_IN_SECS "optimize_checkpoint_limit" + +/** The next doc id */ +#define FTS_SYNCED_DOC_ID "synced_doc_id" + +/** The last word that was OPTIMIZED */ +#define FTS_LAST_OPTIMIZED_WORD "last_optimized_word" + +/** Total number of documents that have been deleted. The next_doc_id +minus this count gives us the total number of documents. */ +#define FTS_TOTAL_DELETED_COUNT "deleted_doc_count" + +/** Total number of words parsed from all documents */ +#define FTS_TOTAL_WORD_COUNT "total_word_count" + +/** Start of optimize of an FTS index */ +#define FTS_OPTIMIZE_START_TIME "optimize_start_time" + +/** End of optimize for an FTS index */ +#define FTS_OPTIMIZE_END_TIME "optimize_end_time" + +/** User specified stopword table name */ +#define FTS_STOPWORD_TABLE_NAME "stopword_table_name" + +/** Whether to use (turn on/off) stopword */ +#define FTS_USE_STOPWORD "use_stopword" + +/** State of the FTS system for this table. It can be one of + RUNNING, OPTIMIZING, DELETED. */ +#define FTS_TABLE_STATE "table_state" + +/** The minimum length of an FTS auxiliary table names's id component +e.g., For an auxiliary table name + + FTS_<TABLE_ID>_SUFFIX + +This constant is for the minimum length required to store the <TABLE_ID> +component. +*/ +#define FTS_AUX_MIN_TABLE_ID_LENGTH 48 + +/** Maximum length of an integer stored in the config table value column. */ +#define FTS_MAX_INT_LEN 32 + +/******************************************************************//** +Parse an SQL string. %s is replaced with the table's id. +@return DB_SUCCESS or error code */ +UNIV_INTERN +que_t* +fts_parse_sql( +/*==========*/ + fts_table_t* fts_table, /*!< in: FTS aux table */ + pars_info_t* info, /*!< in: info struct, or NULL */ + const char* sql); /*!< in: SQL string to evaluate */ +/******************************************************************//** +Evaluate a parsed SQL statement +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_eval_sql( +/*=========*/ + trx_t* trx, /*!< in: transaction */ + que_t* graph); /*!< in: Parsed statement */ +/******************************************************************//** +Construct the name of an ancillary FTS table for the given table. +@return own: table name, must be freed with mem_free() */ +UNIV_INTERN +char* +fts_get_table_name( +/*===============*/ + const fts_table_t* + fts_table); /*!< in: FTS aux table info */ +/******************************************************************//** +Construct the column specification part of the SQL string for selecting the +indexed FTS columns for the given table. Adds the necessary bound +ids to the given 'info' and returns the SQL string. Examples: + +One indexed column named "text": + + "$sel0", + info/ids: sel0 -> "text" + +Two indexed columns named "subject" and "content": + + "$sel0, $sel1", + info/ids: sel0 -> "subject", sel1 -> "content", +@return heap-allocated WHERE string */ +UNIV_INTERN +const char* +fts_get_select_columns_str( +/*=======================*/ + dict_index_t* index, /*!< in: FTS index */ + pars_info_t* info, /*!< in/out: parser info */ + mem_heap_t* heap); /*!< in: memory heap */ + +/** define for fts_doc_fetch_by_doc_id() "option" value, defines whether +we want to get Doc whose ID is equal to or greater or smaller than supplied +ID */ +#define FTS_FETCH_DOC_BY_ID_EQUAL 1 +#define FTS_FETCH_DOC_BY_ID_LARGE 2 +#define FTS_FETCH_DOC_BY_ID_SMALL 3 + +/*************************************************************//** +Fetch document (= a single row's indexed text) with the given +document id. +@return: DB_SUCCESS if fetch is successful, else error */ +UNIV_INTERN +ulint +fts_doc_fetch_by_doc_id( +/*====================*/ + fts_get_doc_t* get_doc, /*!< in: state */ + doc_id_t doc_id, /*!< in: id of document to fetch */ + dict_index_t* index_to_use, /*!< in: caller supplied FTS index */ + ulint option, /*!< in: search option, if it is + greater than doc_id or equal */ + fts_sql_callback + callback, /*!< in: callback to read + records */ + void* arg); /*!< in: callback arg */ + +/*******************************************************************//** +Callback function for fetch that stores the text of an FTS document, +converting each column to UTF-16. +@return: always returns NULL */ +UNIV_INTERN +ibool +fts_query_expansion_fetch_doc( +/*==========================*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg); /*!< in: fts_doc_t* */ +/******************************************************************** +Write out a single word's data as new entry/entries in the INDEX table. +@return DB_SUCCESS if all OK. */ +UNIV_INTERN +ulint +fts_write_node( +/*===========*/ + trx_t* trx, /*!< in: transaction */ + que_t** graph, /*!< in: query graph */ + fts_table_t* fts_table, /*!< in: the FTS aux index */ + fts_string_t* word, /*!< in: word in UTF-8 */ + fts_node_t* node); /*!< in: node columns */ +/*******************************************************************//** +Tokenize a document. */ +UNIV_INTERN +void +fts_tokenize_document( +/*==================*/ + fts_doc_t* doc, /*!< in/out: document to + tokenize */ + fts_doc_t* result); /*!< out: if provided, save + result tokens here */ +/*******************************************************************//** +Continue to tokenize a document. */ +UNIV_INTERN +void +fts_tokenize_document_next( +/*=======================*/ + fts_doc_t* doc, /*!< in/out: document to + tokenize */ + ulint add_pos, /*!< in: add this position to all + tokens from this tokenization */ + fts_doc_t* result); /*!< out: if provided, save + result tokens here */ +/******************************************************************//** +Create a new empty document. +@return own: new document */ +UNIV_INTERN +fts_doc_t* +fts_doc_init( +/*=========*/ + fts_doc_t* doc); /*!< in: doc to initialize */ +/******************************************************************//** +Do a binary search for a doc id in the array +@return +ve index if found -ve index where it should be + inserted if not found */ +UNIV_INTERN +int +fts_bsearch( +/*========*/ + fts_update_t* array, /*!< in: array to sort */ + int lower, /*!< in: lower bound of array*/ + int upper, /*!< in: upper bound of array*/ + doc_id_t doc_id); /*!< in: doc id to lookup */ +/******************************************************************//** +Free document. */ +UNIV_INTERN +void +fts_doc_free( +/*=========*/ + fts_doc_t* doc); /*!< in: document */ +/******************************************************************//** +Free fts_optimizer_word_t instanace.*/ + +void +fts_word_free( +/*==========*/ + fts_word_t* word); /*!< in: instance to free.*/ +/******************************************************************//** +Read the rows from the FTS inde +@return vector of rows fetched */ +UNIV_INTERN +ulint +fts_index_fetch_nodes( +/*==================*/ + trx_t* trx, /*!< in: transaction */ + que_t** graph, /*!< in: prepared statement */ + fts_table_t* fts_table, /*!< in: FTS aux table */ + const fts_string_t* + word, /*!< in: the word to fetch */ + fts_fetch_t* fetch); /*!< in: fetch callback.*/ +/******************************************************************//** +Create a fts_optimizer_word_t instance. +@return new instance */ +UNIV_INTERN +fts_word_t* +fts_word_init( +/*==========*/ + fts_word_t* word, /*!< in: word to initialize */ + byte* utf8, /*!< in: UTF-8 string */ + ulint len); /*!< in: length of string in bytes */ +/******************************************************************//** +Compare two fts_trx_table_t instances, we actually compare the +table id's here. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_trx_table_cmp( +/*==============*/ + const void* v1, /*!< in: id1 */ + const void* v2); /*!< in: id2 */ +/******************************************************************//** +Compare a table id with a trx_table_t table id. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_trx_table_id_cmp( +/*=================*/ + const void* p1, /*!< in: id1 */ + const void* p2); /*!< in: id2 */ +/******************************************************************//** +Commit a transaction. +@return DB_SUCCESS if all OK */ +UNIV_INTERN +ulint +fts_sql_commit( +/*===========*/ + trx_t* trx); /*!< in: transaction */ +/******************************************************************//** +Rollback a transaction. +@return DB_SUCCESS if all OK */ +UNIV_INTERN +ulint +fts_sql_rollback( +/*=============*/ + trx_t* trx); /*!< in: transaction */ +/******************************************************************//** +Parse an SQL string. %s is replaced with the table's id. Don't acquire +the dict mutex +@return query graph */ +UNIV_INTERN +que_t* +fts_parse_sql_no_dict_lock( +/*=======================*/ + fts_table_t* fts_table, /*!< in: table with FTS index */ + pars_info_t* info, /*!< in: parser info */ + const char* sql); /*!< in: SQL string to evaluate */ +/******************************************************************//** +Get value from config table. The caller must ensure that enough +space is allocated for value to hold the column contents +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_get_value( +/*=================*/ + trx_t* trx, /* transaction */ + fts_table_t* fts_table, /*!< in: the indexed FTS table */ + const char* name, /*!< in: get config value for + this parameter name */ + fts_string_t* value); /*!< out: value read from + config table */ +/******************************************************************//** +Get value specific to an FTS index from the config table. The caller +must ensure that enough space is allocated for value to hold the +column contents. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_get_index_value( +/*=======================*/ + trx_t* trx, /*!< transaction */ + dict_index_t* index, /*!< in: index */ + const char* param, /*!< in: get config value for + this parameter name */ + fts_string_t* value); /*!< out: value read from + config table */ +/******************************************************************//** +Set the value in the config table for name. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_set_value( +/*=================*/ + trx_t* trx, /*!< transaction */ + fts_table_t* fts_table, /*!< in: the indexed FTS table */ + const char* name, /*!< in: get config value for + this parameter name */ + const fts_string_t* + value); /*!< in: value to update */ +/****************************************************************//** +Set an ulint value in the config table. +@return DB_SUCCESS if all OK else error code */ +UNIV_INTERN +ulint +fts_config_set_ulint( +/*=================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table, /*!< in: the indexed FTS table */ + const char* name, /*!< in: param name */ + ulint int_value); /*!< in: value */ + +/******************************************************************//** +Set the value specific to an FTS index in the config table. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_set_index_value( +/*=======================*/ + trx_t* trx, /*!< transaction */ + dict_index_t* index, /*!< in: index */ + const char* param, /*!< in: get config value for + this parameter name */ + fts_string_t* value); /*!< out: value read from + config table */ +/******************************************************************//** +Increment the value in the config table for column name. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_increment_value( +/*=======================*/ + trx_t* trx, /*!< transaction */ + fts_table_t* fts_table, /*!< in: the indexed FTS table */ + const char* name, /*!< in: increment config value + for this parameter name */ + ulint delta); /*!< in: increment by this much */ +/******************************************************************//** +Increment the per index value in the config table for column name. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_increment_index_value( +/*=============================*/ + trx_t* trx, /*!< transaction */ + dict_index_t* index, /*!< in: FTS index */ + const char* name, /*!< in: increment config value + for this parameter name */ + ulint delta); /*!< in: increment by this much */ +/******************************************************************//** +Get an ulint value from the config table. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_get_index_ulint( +/*=======================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index */ + const char* name, /*!< in: param name */ + ulint* int_value); /*!< out: value */ +/******************************************************************//** +Set an ulint value int the config table. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_set_index_ulint( +/*=======================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: FTS index */ + const char* name, /*!< in: param name */ + ulint int_value); /*!< in: value */ +/******************************************************************//** +Get an ulint value from the config table. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_config_get_ulint( +/*=================*/ + trx_t* trx, /*!< in: transaction */ + fts_table_t* fts_table, /*!< in: the indexed FTS table */ + const char* name, /*!< in: param name */ + ulint* int_value); /*!< out: value */ +/******************************************************************//** +Search cache for word. +@return the word node vector if found else NULL */ +UNIV_INTERN +const ib_vector_t* +fts_cache_find_word( +/*================*/ + const fts_index_cache_t* + index_cache, /*!< in: cache to search */ + const fts_string_t* + text); /*!< in: word to search for */ +/******************************************************************//** +Check cache for deleted doc id. +@return TRUE if deleted */ +UNIV_INTERN +ibool +fts_cache_is_deleted_doc_id( +/*========================*/ + const fts_cache_t* + cache, /*!< in: cache ito search */ + doc_id_t doc_id); /*!< in: doc id to search for */ +/******************************************************************//** +Append deleted doc ids to vector and sort the vector. */ +UNIV_INTERN +void +fts_cache_append_deleted_doc_ids( +/*=============================*/ + const fts_cache_t* + cache, /*!< in: cache to use */ + ib_vector_t* vector); /*!< in: append to this vector */ +/******************************************************************//** +Wait for the background thread to start. We poll to detect change +of state, which is acceptable, since the wait should happen only +once during startup. +@return true if the thread started else FALSE (i.e timed out) */ +UNIV_INTERN +ibool +fts_wait_for_background_thread_to_start( +/*====================================*/ + dict_table_t* table, /*!< in: table to which the thread + is attached */ + ulint max_wait); /*!< in: time in microseconds, if set + to 0 then it disables timeout + checking */ +/*********************************************************************//** +Get the total number of documents in the FTS. +@return estimated number of rows in the table */ +UNIV_INTERN +ulint +fts_get_total_document_count( +/*=========================*/ + dict_table_t* table); /*!< in: table instance */ +/******************************************************************//** +Get the total number of words in the FTS for a particular FTS index. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +fts_get_total_word_count( +/*=====================*/ + trx_t* trx, /*!< in: transaction */ + dict_index_t* index, /*!< in: for this index */ + ulint* total); /*!< out: total words */ +/******************************************************************//** +Search the index specific cache for a particular FTS index. +@return the index specific cache else NULL */ +UNIV_INTERN +const fts_index_cache_t* +fts_find_index_cache( +/*================*/ + const fts_cache_t* + cache, /*!< in: cache to search */ + const dict_index_t* + index); /*!< in: index to search for */ +/******************************************************************//** +Write the table id to the given buffer (including final NUL). Buffer must be +at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long. +@return number of bytes written */ +UNIV_INLINE +int +fts_write_object_id( +/*================*/ + ib_id_t id, /*!< in: a table/index id */ + char* str); /*!< in: buffer to write the id to */ +/******************************************************************//** +Read the table id from the string generated by fts_write_object_id(). +@return TRUE if parse successful */ +UNIV_INLINE +ibool +fts_read_object_id( +/*===============*/ + ib_id_t* id, /*!< out: a table id */ + const char* str); /*!< in: buffer to read from */ +/******************************************************************//** +Get the table id. +@return number of bytes written */ +UNIV_INTERN +int +fts_get_table_id( +/*=============*/ + const fts_table_t* + fts_table, /*!< in: FTS Auxiliary table */ + char* table_id); /*!< out: table id, must be at least + FTS_AUX_MIN_TABLE_ID_LENGTH bytes + long */ +/******************************************************************//** +Add the table to add to the OPTIMIZER's list. */ +UNIV_INTERN +void +fts_optimize_add_table( +/*===================*/ + dict_table_t* table); /*!< in: table to add */ +/******************************************************************//** +Optimize a table. */ +UNIV_INTERN +void +fts_optimize_do_table( +/*==================*/ + dict_table_t* table); /*!< in: table to optimize */ +/******************************************************************//** +Construct the prefix name of an FTS table. +@return own: table name, must be freed with mem_free() */ +UNIV_INTERN +char* +fts_get_table_name_prefix( +/*======================*/ + const fts_table_t* + fts_table); /*!< in: Auxiliary table type */ +/******************************************************************//** +Add node positions. */ +UNIV_INTERN +void +fts_cache_node_add_positions( +/*=========================*/ + fts_cache_t* cache, /*!< in: cache */ + fts_node_t* node, /*!< in: word node */ + doc_id_t doc_id, /*!< in: doc id */ + ib_vector_t* positions); /*!< in: fts_token_t::positions */ + +/******************************************************************//** +Create the config table name for retrieving index specific value. +@return index config parameter name */ +UNIV_INTERN +char* +fts_config_create_index_param_name( +/*===============================*/ + const char* param, /*!< in: base name of param */ + const dict_index_t* index); /*!< in: index for config */ + +#ifndef UNIV_NONINL +#include "fts0priv.ic" +#endif + +#endif /* INNOBASE_FTS0PRIV_H */ diff --git a/storage/innobase/include/fts0priv.ic b/storage/innobase/include/fts0priv.ic new file mode 100644 index 00000000000..716ea4713b5 --- /dev/null +++ b/storage/innobase/include/fts0priv.ic @@ -0,0 +1,59 @@ +/***************************************************************************** + +Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0priv.ic +Full text search internal header file + +Created 2011/11/12 Sunny Bains +***********************************************************************/ + +/******************************************************************//** +Write the table id to the given buffer (including final NUL). Buffer must be +at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long. +@return number of bytes written */ +UNIV_INLINE +int +fts_write_object_id( +/*================*/ + ib_id_t id, /* in: a table/index id */ + char* str) /* in: buffer to write the id to */ +{ +#ifdef __WIN__ +# define UINT64PFx "%016I64u" +#else +# define UINT64PFx "%016"PRIx64 +# endif /* __WIN__ */ + + // FIXME: Use ut_snprintf() + return(sprintf(str, UINT64PFx, id)); +} + +/******************************************************************//** +Read the table id from the string generated by fts_write_object_id(). +@return TRUE if parse successful */ +UNIV_INLINE +ibool +fts_read_object_id( +/*===============*/ + ib_id_t* id, /* out: an id */ + const char* str) /* in: buffer to read from */ +{ + return(sscanf(str, IB_ID_FMT, id) == 2); +} + diff --git a/storage/innobase/include/fts0tlex.h b/storage/innobase/include/fts0tlex.h new file mode 100644 index 00000000000..c0fed0efa71 --- /dev/null +++ b/storage/innobase/include/fts0tlex.h @@ -0,0 +1,349 @@ +#ifndef fts0tHEADER_H +#define fts0tHEADER_H 1 +#define fts0tIN_HEADER 1 + +#line 6 "../include/fts0tlex.h" + +#line 8 "../include/fts0tlex.h" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 +#define YY_FLEX_SUBMINOR_VERSION 35 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include <inttypes.h> +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +#ifdef __cplusplus + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +/* C99 requires __STDC__ to be defined as 1. */ +#if defined (__STDC__) + +#define YY_USE_CONST + +#endif /* defined (__STDC__) */ +#endif /* ! __cplusplus */ + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +void fts0trestart (FILE *input_file ,yyscan_t yyscanner ); +void fts0t_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +YY_BUFFER_STATE fts0t_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); +void fts0t_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void fts0t_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void fts0tpush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +void fts0tpop_buffer_state (yyscan_t yyscanner ); + +YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); +YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); +YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); + +void *fts0talloc (yy_size_t ,yyscan_t yyscanner ); +void *fts0trealloc (void *,yy_size_t ,yyscan_t yyscanner ); +void fts0tfree (void * ,yyscan_t yyscanner ); + +/* Begin user sect3 */ + +#define fts0twrap(n) 1 +#define YY_SKIP_YYWRAP + +#define yytext_ptr yytext_r + +#ifdef YY_HEADER_EXPORT_START_CONDITIONS +#define INITIAL 0 + +#endif + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#include <unistd.h> +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +int fts0tlex_init (yyscan_t* scanner); + +int fts0tlex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int fts0tlex_destroy (yyscan_t yyscanner ); + +int fts0tget_debug (yyscan_t yyscanner ); + +void fts0tset_debug (int debug_flag ,yyscan_t yyscanner ); + +YY_EXTRA_TYPE fts0tget_extra (yyscan_t yyscanner ); + +void fts0tset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); + +FILE *fts0tget_in (yyscan_t yyscanner ); + +void fts0tset_in (FILE * in_str ,yyscan_t yyscanner ); + +FILE *fts0tget_out (yyscan_t yyscanner ); + +void fts0tset_out (FILE * out_str ,yyscan_t yyscanner ); + +int fts0tget_leng (yyscan_t yyscanner ); + +char *fts0tget_text (yyscan_t yyscanner ); + +int fts0tget_lineno (yyscan_t yyscanner ); + +void fts0tset_lineno (int line_number ,yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int fts0twrap (yyscan_t yyscanner ); +#else +extern int fts0twrap (yyscan_t yyscanner ); +#endif +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int fts0tlex (yyscan_t yyscanner); + +#define YY_DECL int fts0tlex (yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +#undef YY_NEW_FILE +#undef YY_FLUSH_BUFFER +#undef yy_set_bol +#undef yy_new_buffer +#undef yy_set_interactive +#undef YY_DO_BEFORE_ACTION + +#ifdef YY_DECL_IS_OURS +#undef YY_DECL_IS_OURS +#undef YY_DECL +#endif + +#line 68 "fts0tlex.l" + + +#line 348 "../include/fts0tlex.h" +#undef fts0tIN_HEADER +#endif /* fts0tHEADER_H */ diff --git a/storage/innobase/include/fts0types.h b/storage/innobase/include/fts0types.h new file mode 100644 index 00000000000..5b28f2c9473 --- /dev/null +++ b/storage/innobase/include/fts0types.h @@ -0,0 +1,481 @@ +/***************************************************************************** + +Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0types.h +Full text search types file + +Created 2007-03-27 Sunny Bains +*******************************************************/ + +#ifndef INNOBASE_FTS0TYPES_H +#define INNOBASE_FTS0TYPES_H + +#include "que0types.h" +#include "ut0byte.h" +#include "fut0fut.h" +#include "ut0rbt.h" +#include "fts0fts.h" + +/** Types (aliases) used within FTS. */ +typedef struct fts_que_struct fts_que_t; +typedef struct fts_node_struct fts_node_t; +typedef struct fts_word_struct fts_word_t; +typedef struct fts_fetch_struct fts_fetch_t; +typedef struct fts_update_struct fts_update_t; +typedef struct fts_get_doc_struct fts_get_doc_t; +typedef struct fts_utf8_str_struct fts_utf8_str_t; +typedef struct fts_doc_stats_struct fts_doc_stats_t; +typedef struct fts_tokenizer_word_struct fts_tokenizer_word_t; +typedef struct fts_index_selector_struct fts_index_selector_t; + +/** Callbacks used within FTS. */ +typedef pars_user_func_cb_t fts_sql_callback; +typedef void (*fts_filter)(void*, fts_node_t*, void*, ulint len); + +/** Statistics relevant to a particular document, used during retrieval. */ +struct fts_doc_stats_struct { + doc_id_t doc_id; /*!< Document id */ + ulint word_count; /*!< Total words in the document */ +}; + +/** It's main purpose is to store the SQL prepared statements that +are required to retrieve a document from the database. */ +struct fts_get_doc_struct { + fts_index_cache_t* + index_cache; /*!< The index cache instance */ + + /*!< Parsed sql statement */ + que_t* get_document_graph; +}; + +/** Since we can have multiple FTS indexes on a table, we keep a +per index cache of words etc. */ +struct fts_index_cache_struct { + dict_index_t* index; /*!< The FTS index instance */ + + ib_rbt_t* words; /*!< Nodes; indexed by fts_string_t*, + cells are fts_tokenizer_word_t*.*/ + + ib_vector_t* doc_stats; /*!< Array of the fts_doc_stats_t + contained in the memory buffer. + Must be in sorted order (ascending). + The ideal choice is an rb tree but + the rb tree imposes a space overhead + that we can do without */ + + que_t** ins_graph; /*!< Insert query graphs */ + + que_t** sel_graph; /*!< Select query graphs */ + CHARSET_INFO* charset; /*!< charset */ +}; + +/** For supporting the tracking of updates on multiple FTS indexes we need +to track which FTS indexes need to be updated. For INSERT and DELETE we +update all fts indexes. */ +struct fts_update_struct { + doc_id_t doc_id; /*!< The doc id affected */ + + ib_vector_t* fts_indexes; /*!< The FTS indexes that need to be + updated. A NULL value means all + indexes need to be updated. This + vector is not allocated on the heap + and so must be freed explicitly, + when we are done with it */ +}; + +/** Stop word control infotmation. */ +struct fts_stopword_struct { + ulint status; /*!< Status of the stopword tree */ + ib_alloc_t* heap; /*!< The memory allocator to use */ + ib_rbt_t* cached_stopword;/*!< This stores all active stopwords */ + CHARSET_INFO* charset; /*!< charset for stopword */ +}; + +/** The SYNC state of the cache. There is one instance of this struct +associated with each ADD thread. */ +struct fts_sync_struct { + trx_t* trx; /*!< The transaction used for SYNCing + the cache to disk */ + dict_table_t* table; /*!< Table with FTS index(es) */ + ulint max_cache_size; /*!< Max size in bytes of the cache */ + ibool cache_full; /*!< flag, when true it indicates that + we need to sync the cache to disk */ + ulint lower_index; /*!< the start index of the doc id + vector from where to start adding + documents to the FTS cache */ + ulint upper_index; /*!< max index of the doc id vector to + add to the FTS cache */ + ibool interrupted; /*!< TRUE if SYNC was interrupted */ + doc_id_t min_doc_id; /*!< The smallest doc id added to the + cache. It should equal to + doc_ids[lower_index] */ + doc_id_t max_doc_id; /*!< The doc id at which the cache was + noted as being full, we use this to + set the upper_limit field */ + ib_time_t start_time; /*!< SYNC start time */ +}; + +typedef struct fts_sync_struct fts_sync_t; + +/** The cache for the FTS system. It is a memory-based inverted index +that new entries are added to, until it grows over the configured maximum +size, at which time its contents are written to the INDEX table. */ +struct fts_cache_struct { + rw_lock_t lock; /*!< lock protecting all access to the + memory buffer. FIXME: this needs to + be our new upgrade-capable rw-lock */ + + rw_lock_t init_lock; /*!< lock used for the cache + intialization, it has different + SYNC level as above cache lock */ + + mutex_t optimize_lock; /*!< Lock for OPTIMIZE */ + + mutex_t deleted_lock; /*!< Lock covering deleted_doc_ids */ + + mutex_t doc_id_lock; /*!< Lock covering Doc ID */ + + ib_vector_t* deleted_doc_ids;/*!< Array of deleted doc ids, each + element is of type fts_update_t */ + + ib_vector_t* indexes; /*!< We store the stats and inverted + index for the individual FTS indexes + in this vector. Each element is + an instance of fts_index_cache_t */ + + ib_vector_t* get_docs; /*!< information required to read + the document from the table. Each + element is of type fts_doc_t */ + + ulint total_size; /*!< total size consumed by the ilist + field of all nodes. SYNC is run + whenever this gets too big */ + fts_sync_t* sync; /*!< sync structure to sync data to + disk */ + ib_alloc_t* sync_heap; /*!< The heap allocator, for indexes + and deleted_doc_ids, ie. transient + objects, they are recreated after + a SYNC is completed */ + + + ib_alloc_t* self_heap; /*!< This heap is the heap out of + which an instance of the cache itself + was created. Objects created using + this heap will last for the lifetime + of the cache */ + + doc_id_t next_doc_id; /*!< Next doc id */ + + doc_id_t synced_doc_id; /*!< Doc ID sync-ed to CONFIG table */ + + doc_id_t first_doc_id; /*!< first doc id since this table + was opened */ + + ulint deleted; /*!< Number of doc ids deleted since + last optimized. This variable is + covered by deleted_lock */ + + ulint added; /*!< Number of doc ids added since last + optimized. This variable is covered by + the deleted lock */ + + fts_stopword_t stopword_info; /*!< Cached stopwords for the FTS */ + mem_heap_t* cache_heap; /*!< Cache Heap */ +}; + +/** Columns of the FTS auxiliary INDEX table */ +struct fts_node_struct { + doc_id_t first_doc_id; /*!< First document id in ilist. */ + + doc_id_t last_doc_id; /*!< Last document id in ilist. */ + + byte* ilist; /*!< Binary list of documents & word + positions the token appears in. + TODO: For now, these are simply + ut_malloc'd, but if testing shows + that they waste memory unacceptably, a + special memory allocator will have + to be written */ + + ulint doc_count; /*!< Number of doc ids in ilist */ + + ulint ilist_size; /*!< Used size of ilist in bytes. */ + + ulint ilist_size_alloc; + /*!< Allocated size of ilist in + bytes */ +}; + +/** A tokenizer word. Contains information about one word. */ +struct fts_tokenizer_word_struct { + fts_string_t text; /*!< Token text. */ + + ib_vector_t* nodes; /*!< Word node ilists, each element is + of type fts_node_t */ +}; + +/** Word text plus it's array of nodes as on disk in FTS index */ +struct fts_word_struct { + fts_string_t text; /*!< Word value in UTF-8 */ + ib_vector_t* nodes; /*!< Nodes read from disk */ + + ib_alloc_t* heap_alloc; /*!< For handling all allocations */ +}; + +/** Callback for reading and filtering nodes that are read from FTS index */ +struct fts_fetch_struct { + void* read_arg; /*!< Arg for the sql_callback */ + + fts_sql_callback + read_record; /*!< Callback for reading index + record */ +}; + +/** For horizontally splitting an FTS auxiliary index */ +struct fts_index_selector_struct { + ulint value; /*!< Character value at which + to split */ + + const char* suffix; /*!< FTS aux index suffix */ +}; + +/** This type represents a single document. */ +struct fts_doc_struct { + fts_string_t text; /*!< document text */ + + ibool found; /*!< TRUE if the document was found + successfully in the database */ + + ib_rbt_t* tokens; /*!< This is filled when the document + is tokenized. Tokens; indexed by + fts_string_t*, cells are of type + fts_token_t* */ + + ib_alloc_t* self_heap; /*!< An instance of this type is + allocated from this heap along + with any objects that have the + same lifespan, most notably + the vector of token positions */ + CHARSET_INFO* charset; /*!< Document's charset info */ +}; + +/** A token and its positions within a document. */ +struct fts_token_struct { + fts_string_t text; /*!< token text */ + + ib_vector_t* positions; /*!< an array of the positions the + token is found in; each item is + actually an ulint. */ +}; + +/** It's defined in fts/fts0fts.c */ +extern const fts_index_selector_t fts_index_selector[]; + +/******************************************************************//** +Compare two UTF-8 strings. */ +UNIV_INLINE +int +fts_utf8_string_cmp( +/*================*/ + /*!< out: + < 0 if n1 < n2, + 0 if n1 == n2, + > 0 if n1 > n2 */ + const void* p1, /*!< in: key */ + const void* p2); /*!< in: node */ + +/******************************************************************//** +Compare two UTF-8 strings, and return match (0) if +passed in "key" value equals or is the prefix of the "node" value. */ +UNIV_INLINE +int +fts_utf8_string_cmp_prefix( +/*=======================*/ + /*!< out: + < 0 if n1 < n2, + 0 if n1 == n2, + > 0 if n1 > n2 */ + const void* p1, /*!< in: key */ + const void* p2); /*!< in: node */ + +/******************************************************************//** +Compare two fts_trx_row_t instances doc_ids. */ +UNIV_INLINE +int +fts_trx_row_doc_id_cmp( +/*===================*/ + /*!< out: + < 0 if n1 < n2, + 0 if n1 == n2, + > 0 if n1 > n2 */ + const void* p1, /*!< in: id1 */ + const void* p2); /*!< in: id2 */ + +/******************************************************************//** +Compare two fts_ranking_t instances doc_ids. */ +UNIV_INLINE +int +fts_ranking_doc_id_cmp( +/*===================*/ + /*!< out: + < 0 if n1 < n2, + 0 if n1 == n2, + > 0 if n1 > n2 */ + const void* p1, /*!< in: id1 */ + const void* p2); /*!< in: id2 */ + +/******************************************************************//** +Compare two fts_update_t instances doc_ids. */ +UNIV_INLINE +int +fts_update_doc_id_cmp( +/*==================*/ + /*!< out: + < 0 if n1 < n2, + 0 if n1 == n2, + > 0 if n1 > n2 */ + const void* p1, /*!< in: id1 */ + const void* p2); /*!< in: id2 */ + +/******************************************************************//** +Decode and return the integer that was encoded using our VLC scheme.*/ +UNIV_INLINE +ulint +fts_decode_vlc( +/*===========*/ + /*!< out: value decoded */ + byte** ptr); /*!< in: ptr to decode from, this ptr is + incremented by the number of bytes decoded */ + +/******************************************************************//** +Duplicate an UTF-8 string. */ +UNIV_INLINE +void +fts_utf8_string_dup( +/*================*/ + /*!< out: + < 0 if n1 < n2, + 0 if n1 == n2, + > 0 if n1 > n2 */ + fts_string_t* dst, /*!< in: dup to here */ + const fts_string_t* src, /*!< in: src string */ + mem_heap_t* heap); /*!< in: heap to use */ + +/******************************************************************//** +Return length of val if it were encoded using our VLC scheme. */ +UNIV_INLINE +ulint +fts_get_encoded_len( +/*================*/ + /*!< out: length of value + encoded, in bytes */ + ulint val); /*!< in: value to encode */ + +/******************************************************************//** +Encode an integer using our VLC scheme and return the length in bytes. */ +UNIV_INLINE +ulint +fts_encode_int( +/*===========*/ + /*!< out: length of value + encoded, in bytes */ + ulint val, /*!< in: value to encode */ + byte* buf); /*!< in: buffer, must have + enough space */ + +/******************************************************************//** +Decode a UTF-8 character. + +http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf: + + Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte +00000000 0xxxxxxx 0xxxxxxx +00000yyy yyxxxxxx 110yyyyy 10xxxxxx +zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx +000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx + +This function decodes UTF-8 sequences up to 6 bytes (31 bits). + +On error *ptr will point to the first byte that was not correctly +decoded. This will hopefully help in resyncing the input. */ +UNIV_INLINE +ulint +fts_utf8_decode( +/*============*/ + /*!< out: UTF8_ERROR if *ptr + did not point to a valid + UTF-8 sequence, or the + Unicode code point. */ + const byte** ptr); /*!< in/out: pointer to + UTF-8 string. The + pointer is advanced to + the start of the next + character. */ + +/******************************************************************//** +Lowercase an UTF-8 string. */ +UNIV_INLINE +void +fts_utf8_tolower( +/*=============*/ + fts_string_t* str); /*!< in: string */ + +/******************************************************************//** +Get the selected FTS aux INDEX suffix. */ +UNIV_INLINE +const char* +fts_get_suffix( +/*===========*/ + ulint selected); /*!< in: selected index */ + +/******************************************************************** +Get the number of index selectors. */ +UNIV_INLINE +ulint +fts_get_n_selectors(void); +/*=====================*/ + +/******************************************************************//** +Select the FTS auxiliary index for the given string. +@return the index to use for the string */ +UNIV_INLINE +ulint +fts_select_index( +/*=============*/ + const CHARSET_INFO* cs, /*!< Charset */ + const byte* str, /*!< in: word string */ + ulint len); /*!< in: string length */ + +/******************************************************************** +Select the next FTS auxiliary index for the given character. +@return the next index to use for character */ +UNIV_INLINE +ulint +fts_select_next_index( +/*==================*/ + const CHARSET_INFO* cs, /*!< Charset */ + const byte* str, /*!< in: string */ + ulint len); /*!< in: string length */ + +#ifndef UNIV_NONINL +#include "fts0types.ic" +#include "fts0vlc.ic" +#endif + +#endif /* INNOBASE_FTS0TYPES_H */ diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic new file mode 100644 index 00000000000..2734a331a86 --- /dev/null +++ b/storage/innobase/include/fts0types.ic @@ -0,0 +1,427 @@ +/***************************************************************************** + +Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0types.ic +Full text search types. + +Created 2007-03-27 Sunny Bains +*******************************************************/ + +#ifndef INNOBASE_FTS0TYPES_IC +#define INNOBASE_FTS0TYPES_IC + +#include <ctype.h> + +#include "rem0cmp.h" +#include "ha_prototypes.h" + +extern const ulint UTF8_ERROR; + +/* Determine if a UTF-8 continuation byte is valid. */ +#define fts_utf8_is_valid(b) (((b) & 0xC0) == 0x80) + +/******************************************************************//** +Compare two fts_trx_table_t instances. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_trx_table_cmp( +/*==============*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const dict_table_t* table1 = (*(const fts_trx_table_t**) p1)->table; + const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table; + + return((table1->id > table2->id) + ? 1 + : (table1->id == table2->id) + ? 0 + : -1); +} + +/******************************************************************//** +Compare a table id with a fts_trx_table_t table id. +@return < 0 if n1 < n2, 0 if n1 == n2,> 0 if n1 > n2 */ +UNIV_INLINE +int +fts_trx_table_id_cmp( +/*=================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const ullint* table_id = (const ullint*) p1; + const dict_table_t* table2 = (*(const fts_trx_table_t**) p2)->table; + + return((*table_id > table2->id) + ? 1 + : (*table_id == table2->id) + ? 0 + : -1); +} + +/******************************************************************//** +Duplicate an UTF-8 string. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +void +fts_utf8_string_dup( +/*================*/ + fts_string_t* dst, /*!< in: dup to here */ + const fts_string_t* src, /*!< in: src string */ + mem_heap_t* heap) /*!< in: heap to use */ +{ + dst->f_str = (byte*) mem_heap_dup(heap, src->f_str, src->f_len + 1); + + dst->f_len = src->f_len; + dst->f_str[src->f_len] = 0; + dst->f_n_char = src->f_n_char; +} + +/******************************************************************//** +Compare two fts_trx_row_t doc_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_trx_row_doc_id_cmp( +/*===================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const fts_trx_row_t* tr1 = (const fts_trx_row_t*) p1; + const fts_trx_row_t* tr2 = (const fts_trx_row_t*) p2; + + return((int)(tr1->doc_id - tr2->doc_id)); +} + +/******************************************************************//** +Compare two fts_ranking_t doc_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_ranking_doc_id_cmp( +/*===================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const fts_ranking_t* rk1 = (const fts_ranking_t*) p1; + const fts_ranking_t* rk2 = (const fts_ranking_t*) p2; + + return((int)(rk1->doc_id - rk2->doc_id)); +} + +/******************************************************************//** +Compare two fts_update_t doc_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_update_doc_id_cmp( +/*==================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const fts_update_t* up1 = (const fts_update_t*) p1; + const fts_update_t* up2 = (const fts_update_t*) p2; + + return((int)(up1->doc_id - up2->doc_id)); +} + + +/******************************************************************//** +Lowercase an UTF-8 string. */ +UNIV_INLINE +void +fts_utf8_tolower( +/*=============*/ + fts_string_t* str) /*!< in: string */ +{ + innobase_casedn_str((char*) str->f_str); +} + +/******************************************************************//** +Compare two UTF-8 strings. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_utf8_string_cmp( +/*================*/ + const void* p1, /*!< in: key */ + const void* p2) /*!< in: node */ +{ + const fts_string_t* s1 = (const fts_string_t*) p1; + const fts_string_t* s2 = (const fts_string_t*) p2; + + return(cmp_data_data_slow_varchar( + s1->f_str, s1->f_len, s2->f_str, s2->f_len)); +} + +/******************************************************************//** +Compare two UTF-8 strings, and return match (0) if +passed in "key" value equals or is the prefix of the "node" value. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_utf8_string_cmp_prefix( +/*=======================*/ + const void* p1, /*!< in: key */ + const void* p2) /*!< in: node */ +{ + int result; + ulint len; + + const fts_string_t* s1 = (const fts_string_t*) p1; + const fts_string_t* s2 = (const fts_string_t*) p2; + + len = ut_min(s1->f_len, s2->f_len); + + result = cmp_data_data_slow_varchar(s1->f_str, len, s2->f_str, len); + + if (result) { + return(result); + } + + if (s1->f_len > s2->f_len) { + return(1); + } + + return(0); +} + +/******************************************************************//** +Decode a UTF-8 character. + +http://www.unicode.org/versions/Unicode4.0.0/ch03.pdf: + + Scalar Value 1st Byte 2nd Byte 3rd Byte 4th Byte +00000000 0xxxxxxx 0xxxxxxx +00000yyy yyxxxxxx 110yyyyy 10xxxxxx +zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx +000uuuzz zzzzyyyy yyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx + +This function decodes UTF-8 sequences up to 6 bytes (31 bits). + +On error *ptr will point to the first byte that was not correctly +decoded. This will hopefully help in resyncing the input. +@return UTF8_ERROR if *ptr did not point to a valid +UTF-8 sequence, or the Unicode code point. */ +UNIV_INLINE +ulint +fts_utf8_decode( +/*============*/ + const byte** ptr) /*!< in/out: pointer to + UTF-8 string. The + pointer is advanced to + the start of the next + character. */ +{ + const byte* p = *ptr; + ulint ch = *p++; +#ifdef UNIV_DEBUG + ulint min_ch; +#endif /* UNIV_DEBUG */ + + if (UNIV_LIKELY(ch < 0x80)) { + /* 0xxxxxxx */ + } else if (UNIV_UNLIKELY(ch < 0xC0)) { + /* A continuation byte cannot start a code. */ + goto err_exit; + } else if (ch < 0xE0) { + /* 110yyyyy 10xxxxxx */ + ch &= 0x1F; + ut_d(min_ch = 0x80); + goto get1; + } else if (ch < 0xF0) { + /* 1110zzzz 10yyyyyy 10xxxxxx */ + ch &= 0x0F; + ut_d(min_ch = 0x800); + goto get2; + } else if (ch < 0xF8) { + /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */ + ch &= 0x07; + ut_d(min_ch = 0x10000); + goto get3; + } else if (ch < 0xFC) { + /* 111110tt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */ + ch &= 0x03; + ut_d(min_ch = 0x200000); + goto get4; + } else if (ch < 0xFE) { + /* 1111110s 10tttttt 10uuuuuu 10zzzzzz 10yyyyyy 10xxxxxx */ + ut_d(min_ch = 0x4000000); + if (!fts_utf8_is_valid(*p)) { + goto err_exit; + } + ch <<= 6; + ch |= (*p++) & 0x3F; +get4: + if (!fts_utf8_is_valid(*p)) { + goto err_exit; + } + ch <<= 6; + ch |= (*p++) & 0x3F; +get3: + if (!fts_utf8_is_valid(*p)) { + goto err_exit; + } + ch <<= 6; + ch |= (*p++) & 0x3F; +get2: + if (!fts_utf8_is_valid(*p)) { + goto err_exit; + } + ch <<= 6; + ch |= (*p++) & 0x3F; +get1: + if (!fts_utf8_is_valid(*p)) { + goto err_exit; + } + ch <<= 6; + ch |= (*p++) & 0x3F; + + /* The following is needed in the 6-byte case + when ulint is wider than 32 bits. */ + ch &= 0xFFFFFFFF; + + /* The code positions U+D800 to U+DFFF (UTF-16 surrogate pairs) + and U+FFFE and U+FFFF cannot occur in valid UTF-8. */ + + if ( (ch >= 0xD800 && ch <= 0xDFFF) +#ifdef UNIV_DEBUG + || ch < min_ch +#endif /* UNIV_DEBUG */ + || ch == 0xFFFE || ch == 0xFFFF) { + + ch = UTF8_ERROR; + } + } else { +err_exit: + ch = UTF8_ERROR; + } + + *ptr = p; + + return(ch); +} + +/******************************************************************//** +Get the first character's code position for FTS index partition */ +extern +ulint +innobase_strnxfrm( +/*==============*/ + const CHARSET_INFO* cs, /*!< in: Character set */ + const uchar* p2, /*!< in: string */ + const ulint len2); /*!< in: string length */ + +/******************************************************************//** +Select the FTS auxiliary index for the given character. +@return the index to use for the string */ +UNIV_INLINE +ulint +fts_select_index( +/*=============*/ + const CHARSET_INFO* cs, /*!< in: Charset */ + const byte* str, /*!< in: string */ + ulint len) /*!< in: string length */ +{ + ulint selected = 0; + ulint value = innobase_strnxfrm(cs, str, len); + + while (fts_index_selector[selected].value != 0) { + + if (fts_index_selector[selected].value == value) { + + return(selected); + + } else if (fts_index_selector[selected].value > value) { + + return(selected > 0 ? selected - 1 : 0); + } + + ++selected; + } + + ut_ad(selected > 1); + + return(selected - 1); +} + +/******************************************************************//** +Select the next FTS auxiliary index for the given character. +@return the next index to use for character */ +UNIV_INLINE +ulint +fts_select_next_index( +/*==================*/ + const CHARSET_INFO* cs, /*!< in: Charset */ + const byte* str, /*!< in: string */ + ulint len) /*!< in: string length */ +{ + ulint selected = 0; + ulint value = innobase_strnxfrm(cs, str, len); + + while (fts_index_selector[selected].value != 0) { + + if (fts_index_selector[selected].value == value) { + + return(selected + 1); + + } else if (fts_index_selector[selected].value > value) { + + return(selected); + } + + ++selected; + } + + ut_ad(selected > 0); + + return((ulint) selected); +} + +/******************************************************************//** +Return the selected FTS aux index suffix. */ +UNIV_INLINE +const char* +fts_get_suffix( +/*===========*/ + ulint selected) /*!< in: selected index */ +{ + return(fts_index_selector[selected].suffix); +} + +/******************************************************************//** +Get the number of index selectors. +@return The number of selectors */ +UNIV_INLINE +ulint +fts_get_n_selectors(void) +/*=====================*/ +{ + ulint i = 0; + + // FIXME: This is a hack + while (fts_index_selector[i].value != 0) { + ++i; + } + + return(i); +} + +#endif /* INNOBASE_FTS0TYPES_IC */ diff --git a/storage/innobase/include/fts0vlc.ic b/storage/innobase/include/fts0vlc.ic new file mode 100644 index 00000000000..e79bcf59347 --- /dev/null +++ b/storage/innobase/include/fts0vlc.ic @@ -0,0 +1,142 @@ +/***************************************************************************** + +Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/fts0vlc.ic +Full text variable length integer encoding/decoding. + +Created 2007-03-27 Sunny Bains +*******************************************************/ + +#ifndef INNOBASE_FTS0VLC_IC +#define INNOBASE_FTS0VLC_IC + +#include "fts0types.h" + +/******************************************************************//** +Return length of val if it were encoded using our VLC scheme. +FIXME: We will need to be able encode 8 bytes value +@return length of value encoded, in bytes */ +UNIV_INLINE +ulint +fts_get_encoded_len( +/*================*/ + ulint val) /* in: value to encode */ +{ + if (val <= 127) { + return(1); + } else if (val <= 16383) { + return(2); + } else if (val <= 2097151) { + return(3); + } else if (val <= 268435455) { + return(4); + } else { + /* Possibly we should care that on 64-bit machines ulint can + contain values that we can't encode in 5 bytes, but + fts_encode_int doesn't handle them either so it doesn't much + matter. */ + + return(5); + } +} + +/******************************************************************//** +Encode an integer using our VLC scheme and return the length in bytes. +@return length of value encoded, in bytes */ +UNIV_INLINE +ulint +fts_encode_int( +/*===========*/ + ulint val, /* in: value to encode */ + byte* buf) /* in: buffer, must have enough space */ +{ + ulint len; + + if (val <= 127) { + *buf = (byte) val; + + len = 1; + } else if (val <= 16383) { + *buf++ = (byte)(val >> 7); + *buf = (byte)(val & 0x7F); + + len = 2; + } else if (val <= 2097151) { + *buf++ = (byte)(val >> 14); + *buf++ = (byte)((val >> 7) & 0x7F); + *buf = (byte)(val & 0x7F); + + len = 3; + } else if (val <= 268435455) { + *buf++ = (byte)(val >> 21); + *buf++ = (byte)((val >> 14) & 0x7F); + *buf++ = (byte)((val >> 7) & 0x7F); + *buf = (byte)(val & 0x7F); + + len = 4; + } else { + /* Best to keep the limitations of the 32/64 bit versions + identical, at least for the time being. */ + ut_ad(val <= 4294967295u); + + *buf++ = (byte)(val >> 28); + *buf++ = (byte)((val >> 21) & 0x7F); + *buf++ = (byte)((val >> 14) & 0x7F); + *buf++ = (byte)((val >> 7) & 0x7F); + *buf = (byte)(val & 0x7F); + + len = 5; + } + + /* High-bit on means "last byte in the encoded integer". */ + *buf |= 0x80; + + return(len); +} + +/******************************************************************//** +Decode and return the integer that was encoded using our VLC scheme. +@return value decoded */ +UNIV_INLINE +ulint +fts_decode_vlc( +/*===========*/ + byte** ptr) /* in: ptr to decode from, this ptr is + incremented by the number of bytes decoded */ +{ + ulint val = 0; + + for (;;) { + byte b = **ptr; + + ++*ptr; + val |= (b & 0x7F); + + /* High-bit on means "last byte in the encoded integer". */ + if (b & 0x80) { + break; + } else { + val <<= 7; + } + } + + return(val); +} + +#endif diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h index dce20b3bad6..851cdb44cdf 100644 --- a/storage/innobase/include/fut0fut.h +++ b/storage/innobase/include/fut0fut.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic index 0b52719a055..b065b10b9ca 100644 --- a/storage/innobase/include/fut0fut.ic +++ b/storage/innobase/include/fut0fut.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h index fe024c2498f..90f9a65d4fa 100644 --- a/storage/innobase/include/fut0lst.h +++ b/storage/innobase/include/fut0lst.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic index dcd13c61871..d18cf21378f 100644 --- a/storage/innobase/include/fut0lst.ic +++ b/storage/innobase/include/fut0lst.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h index 83a7394123f..1a2b8dac014 100644 --- a/storage/innobase/include/ha0ha.h +++ b/storage/innobase/include/ha0ha.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -45,9 +45,10 @@ ha_search_and_get_data( ulint fold); /*!< in: folded value of the searched data */ /*********************************************************//** Looks for an element when we know the pointer to the data and updates -the pointer to data if found. */ +the pointer to data if found. +@return TRUE if found */ UNIV_INTERN -void +ibool ha_search_and_update_if_found_func( /*===============================*/ hash_table_t* table, /*!< in/out: hash table */ @@ -92,8 +93,12 @@ ha_create_func( ulint mutex_level, /*!< in: level of the mutexes in the latching order: this is used in the debug version */ #endif /* UNIV_SYNC_DEBUG */ - ulint n_mutexes); /*!< in: number of mutexes to protect the + ulint n_mutexes, /*!< in: number of mutexes to protect the hash table: must be a power of 2, or 0 */ + ulint type); /*!< in: type of datastructure for which + the memory heap is going to be used e.g.: + MEM_HEAP_FOR_BTR_SEARCH or + MEM_HEAP_FOR_PAGE_HASH */ #ifdef UNIV_SYNC_DEBUG /** Creates a hash table. @return own: created table @@ -102,7 +107,7 @@ chosen to be a slightly bigger prime number. @param level in: level of the mutexes in the latching order @param n_m in: number of mutexes to protect the hash table; must be a power of 2, or 0 */ -# define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m) +# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type) #else /* UNIV_SYNC_DEBUG */ /** Creates a hash table. @return own: created table @@ -111,10 +116,18 @@ chosen to be a slightly bigger prime number. @param level in: level of the mutexes in the latching order @param n_m in: number of mutexes to protect the hash table; must be a power of 2, or 0 */ -# define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m) +# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type) #endif /* UNIV_SYNC_DEBUG */ /*************************************************************//** +Empties a hash table and frees the memory heaps. */ +UNIV_INTERN +void +ha_clear( +/*=====*/ + hash_table_t* table); /*!< in, own: hash table */ + +/*************************************************************//** Inserts an entry into a hash table. If an entry with the same fold number is found, its node is updated to point to the new data, and no new node is inserted. @@ -131,7 +144,7 @@ ha_insert_for_fold_func( #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG buf_block_t* block, /*!< in: buffer block containing the data */ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - rec_t* data); /*!< in: data, must not be NULL */ + const rec_t* data); /*!< in: data, must not be NULL */ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG /** @@ -143,7 +156,10 @@ is inserted. @param f in: folded value of data @param b in: buffer block containing the data @param d in: data, must not be NULL */ -# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d) +# define ha_insert_for_fold(t,f,b,d) do { \ + ha_insert_for_fold_func(t,f,b,d); \ + MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \ +} while(0) #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ /** Inserts an entry into a hash table. If an entry with the same fold number @@ -154,7 +170,10 @@ is inserted. @param f in: folded value of data @param b ignored: buffer block containing the data @param d in: data, must not be NULL */ -# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d) +# define ha_insert_for_fold(t,f,b,d) do { \ + ha_insert_for_fold_func(t,f,d); \ + MONITOR_INC(MONITOR_ADAPTIVE_HASH_ROW_ADDED); \ +} while (0) #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ /*********************************************************//** @@ -214,20 +233,33 @@ struct ha_node_struct { ulint fold; /*!< fold value for the data */ }; -#ifndef UNIV_HOTBACKUP -/** Assert that the current thread is holding the mutex protecting a -hash bucket corresponding to a fold value. -@param table in: hash table -@param fold in: fold value */ -# define ASSERT_HASH_MUTEX_OWN(table, fold) \ - ut_ad(!(table)->mutexes || mutex_own(hash_get_mutex(table, fold))) -#else /* !UNIV_HOTBACKUP */ -/** Assert that the current thread is holding the mutex protecting a -hash bucket corresponding to a fold value. -@param table in: hash table -@param fold in: fold value */ -# define ASSERT_HASH_MUTEX_OWN(table, fold) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ +#ifdef UNIV_DEBUG +/********************************************************************//** +Assert that the synchronization object in a hash operation involving +possible change in the hash table is held. +Note that in case of mutexes we assert that mutex is owned while in case +of rw-locks we assert that it is held in exclusive mode. */ +UNIV_INLINE +void +hash_assert_can_modify( +/*===================*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold); /*!< in: fold value */ +/********************************************************************//** +Assert that the synchronization object in a hash search operation is held. +Note that in case of mutexes we assert that mutex is owned while in case +of rw-locks we assert that it is held either in x-mode or s-mode. */ +UNIV_INLINE +void +hash_assert_can_search( +/*===================*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold); /*!< in: fold value */ +#else /* UNIV_DEBUG */ +#define hash_assert_can_modify(t, f) +#define hash_assert_can_search(t, f) +#endif /* UNIV_DEBUG */ + #ifndef UNIV_NONINL #include "ha0ha.ic" diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic index aec28398b5d..91794e8f1fc 100644 --- a/storage/innobase/include/ha0ha.ic +++ b/storage/innobase/include/ha0ha.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -106,6 +106,56 @@ ha_chain_get_first( hash_get_nth_cell(table, hash_calc_hash(fold, table))->node); } +#ifdef UNIV_DEBUG +/********************************************************************//** +Assert that the synchronization object in a hash operation involving +possible change in the hash table is held. +Note that in case of mutexes we assert that mutex is owned while in case +of rw-locks we assert that it is held in exclusive mode. */ +UNIV_INLINE +void +hash_assert_can_modify( +/*===================*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold) /*!< in: fold value */ +{ + if (table->type == HASH_TABLE_SYNC_MUTEX) { + ut_ad(mutex_own(hash_get_mutex(table, fold))); + } else if (table->type == HASH_TABLE_SYNC_RW_LOCK) { +# ifdef UNIV_SYNC_DEBUG + rw_lock_t* lock = hash_get_lock(table, fold); + ut_ad(rw_lock_own(lock, RW_LOCK_EX)); +# endif + } else { + ut_ad(table->type == HASH_TABLE_SYNC_NONE); + } +} + +/********************************************************************//** +Assert that the synchronization object in a hash search operation is held. +Note that in case of mutexes we assert that mutex is owned while in case +of rw-locks we assert that it is held either in x-mode or s-mode. */ +UNIV_INLINE +void +hash_assert_can_search( +/*===================*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold) /*!< in: fold value */ +{ + if (table->type == HASH_TABLE_SYNC_MUTEX) { + ut_ad(mutex_own(hash_get_mutex(table, fold))); + } else if (table->type == HASH_TABLE_SYNC_RW_LOCK) { +# ifdef UNIV_SYNC_DEBUG + rw_lock_t* lock = hash_get_lock(table, fold); + ut_ad(rw_lock_own(lock, RW_LOCK_EX) + || rw_lock_own(lock, RW_LOCK_SHARED)); +# endif + } else { + ut_ad(table->type == HASH_TABLE_SYNC_NONE); + } +} +#endif /* UNIV_DEBUG */ + /*************************************************************//** Looks for an element in a hash table. @return pointer to the data of the first hash table node in chain @@ -119,10 +169,7 @@ ha_search_and_get_data( { ha_node_t* node; - ASSERT_HASH_MUTEX_OWN(table, fold); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ + hash_assert_can_search(table, fold); ut_ad(btr_search_enabled); node = ha_chain_get_first(table, fold); @@ -152,7 +199,7 @@ ha_search_with_data( { ha_node_t* node; - ASSERT_HASH_MUTEX_OWN(table, fold); + hash_assert_can_search(table, fold); ut_ad(btr_search_enabled); @@ -184,10 +231,7 @@ ha_search_and_delete_if_found( { ha_node_t* node; - ASSERT_HASH_MUTEX_OWN(table, fold); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ + hash_assert_can_modify(table, fold); ut_ad(btr_search_enabled); node = ha_search_with_data(table, fold, data); diff --git a/storage/innobase/include/ha0storage.h b/storage/innobase/include/ha0storage.h index c30bd840579..caf42abfcfe 100644 --- a/storage/innobase/include/ha0storage.h +++ b/storage/innobase/include/ha0storage.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/ha0storage.ic b/storage/innobase/include/ha0storage.ic index 5acbf82f005..ce6e7406b43 100644 --- a/storage/innobase/include/ha0storage.ic +++ b/storage/innobase/include/ha0storage.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index edf7a1a28c1..f2317054c7f 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -27,9 +27,17 @@ Created 5/11/2006 Osku Salerma #ifndef HA_INNODB_PROTOTYPES_H #define HA_INNODB_PROTOTYPES_H +#include "my_dbug.h" +#include "my_compare.h" +#include "my_sys.h" +#include "m_string.h" + #include "trx0types.h" #include "m_ctype.h" /* CHARSET_INFO */ +// Forward declaration +typedef struct fts_string_struct fts_string_t; + /*********************************************************************//** Wrapper around MySQL's copy_and_convert function. @return number of bytes copied to 'to' */ @@ -43,7 +51,8 @@ innobase_convert_string( CHARSET_INFO* to_cs, /*!< in: character set to convert to */ const void* from, /*!< in: string to convert */ ulint from_length, /*!< in: number of bytes to convert */ - CHARSET_INFO* from_cs, /*!< in: character set to convert from */ + CHARSET_INFO* from_cs, /*!< in: character set to convert + from */ uint* errors); /*!< out: number of errors encountered during the conversion */ @@ -136,6 +145,23 @@ innobase_mysql_print_thd( uint max_query_len); /*!< in: max query length to print, or 0 to use the default max length */ +/*************************************************************//** +InnoDB uses this function to compare two data fields for which the data type +is such that we must use MySQL code to compare them. +@return 1, 0, -1, if a is greater, equal, less than b, respectively */ +UNIV_INTERN +int +innobase_mysql_cmp( +/*===============*/ + int mysql_type, /*!< in: MySQL type */ + uint charset_number, /*!< in: number of the charset */ + const unsigned char* a, /*!< in: data field */ + unsigned int a_length, /*!< in: data field length, + not UNIV_SQL_NULL */ + const unsigned char* b, /*!< in: data field */ + unsigned int b_length) /*!< in: data field length, + not UNIV_SQL_NULL */ + __attribute__((nonnull, warn_unused_result)); /**************************************************************//** Converts a MySQL type to an InnoDB type. Note that this function returns the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1 @@ -174,6 +200,17 @@ innobase_strcasecmp( const char* b); /*!< in: second string to compare */ /******************************************************************//** +Compares NUL-terminated UTF-8 strings case insensitively. The +second string contains wildcards. +@return 0 if a match is found, 1 if not */ +UNIV_INTERN +int +innobase_wildcasecmp( +/*=================*/ + const char* a, /*!< in: string to compare */ + const char* b); /*!< in: wildcard string to compare */ + +/******************************************************************//** Strip dir name from a full path name and return only its file name. @return file name or "null" if no file name */ UNIV_INTERN @@ -211,8 +248,8 @@ innobase_convert_from_id( struct charset_info_st* cs, /*!< in: the 'from' character set */ char* to, /*!< out: converted identifier */ const char* from, /*!< in: identifier to convert */ - ulint len); /*!< in: length of 'to', in bytes; should - be at least 3 * strlen(to) + 1 */ + ulint len); /*!< in: length of 'to', in bytes; + should be at least 3 * strlen(to) + 1 */ /******************************************************************//** Makes all characters in a NUL-terminated UTF-8 string lower case. */ UNIV_INTERN @@ -256,11 +293,20 @@ innobase_get_at_most_n_mbchars( ulint data_len, /*!< in: length of the string in bytes */ const char* str); /*!< in: character string */ +/*************************************************************//** +InnoDB index push-down condition check +@return ICP_NO_MATCH, ICP_MATCH, or ICP_OUT_OF_RANGE */ +UNIV_INTERN +enum icp_result +innobase_index_cond( +/*================*/ + void* file) /*!< in/out: pointer to ha_innobase */ + __attribute__((nonnull, warn_unused_result)); /******************************************************************//** Returns true if the thread supports XA, global value of innodb_supports_xa if thd is NULL. @return true if thd supports XA */ - +UNIV_INTERN ibool thd_supports_xa( /*============*/ @@ -282,8 +328,18 @@ UNIV_INTERN void thd_set_lock_wait_time( /*===================*/ - void* thd, /*!< in: thread handle (THD*) */ - ulint value); /*!< in: time waited for the lock */ + void* thd, /*!< in: thread handle (THD*) */ + ulint value); /*!< in: time waited for the lock */ + +/**********************************************************************//** +Get the current setting of the table_cache_size global parameter. We do +a dirty read because for one there is no synchronization object and +secondly there is little harm in doing so even if we get a torn read. +@return SQL statement string */ +UNIV_INTERN +ulint +innobase_get_table_cache_size(void); +/*===============================*/ /**********************************************************************//** Get the current setting of the lower_case_table_names global parameter from @@ -296,4 +352,66 @@ ulint innobase_get_lower_case_table_names(void); /*=====================================*/ -#endif +/*************************************************************//** +Get the next token from the given string and store it in *token. */ +UNIV_INTERN +ulint +innobase_mysql_fts_get_token( +/*=========================*/ + CHARSET_INFO* charset, /*!< in: Character set */ + byte* start, /*!< in: start of text */ + byte* end, /*!< in: one character past end of + text */ + fts_string_t* token, /*!< out: token's text */ + ulint* offset); /*!< out: offset to token, + measured as characters from + 'start' */ + +/******************************************************************//** +compare two character string case insensitively according to their charset. */ +UNIV_INTERN +int +innobase_fts_text_case_cmp( +/*=======================*/ + const void* cs, /*!< in: Character set */ + const void* p1, /*!< in: key */ + const void* p2); /*!< in: node */ + +/******************************************************************//** +compare two character string according to their charset. */ +UNIV_INTERN +int +innobase_fts_string_cmp( +/*====================*/ + const void* cs, /*!< in: Character set */ + const void* p1, /*!< in: key */ + const void* p2); /*!< in: node */ + +/****************************************************************//** +Get FTS field charset info from the field's prtype +@return charset info */ +UNIV_INTERN +CHARSET_INFO* +innobase_get_fts_charset( +/*=====================*/ + int mysql_type, /*!< in: MySQL type */ + uint charset_number);/*!< in: number of the charset */ +/******************************************************************//** +Returns true if transaction should be flagged as read-only. +@return true if the thd is marked as read-only */ +UNIV_INTERN +ibool +thd_trx_is_read_only( +/*=================*/ + void* thd); /*!< in: thread handle (THD*) */ + +/******************************************************************//** +Check if the transaction is an auto-commit transaction. TRUE also +implies that it is a SELECT (read-only) transaction. +@return true if the transaction is an auto commit read-only transaction. */ +UNIV_INTERN +ibool +thd_trx_is_auto_commit( +/*===================*/ + void* thd); /*!< in: thread handle (THD*) can be NULL */ +#endif /* HA_INNODB_PROTOTYPES_H */ diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h index 017fe88d533..c5d439ef21b 100644 --- a/storage/innobase/include/handler0alter.h +++ b/storage/innobase/include/handler0alter.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h index b17c21a45ef..1c19ea53a23 100644 --- a/storage/innobase/include/hash0hash.h +++ b/storage/innobase/include/hash0hash.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -30,6 +30,7 @@ Created 5/20/1997 Heikki Tuuri #include "mem0mem.h" #ifndef UNIV_HOTBACKUP # include "sync0sync.h" +# include "sync0rw.h" #endif /* !UNIV_HOTBACKUP */ typedef struct hash_table_struct hash_table_t; @@ -40,6 +41,18 @@ typedef void* hash_node_t; /* Fix Bug #13859: symbol collision between imap/mysql */ #define hash_create hash0_create +/* Differnt types of hash_table based on the synchronization +method used for it. */ +enum hash_table_sync_t { + HASH_TABLE_SYNC_NONE = 0, /*!< Don't use any internal + synchronization objects for + this hash_table. */ + HASH_TABLE_SYNC_MUTEX, /*!< Use mutexes to control + access to this hash_table. */ + HASH_TABLE_SYNC_RW_LOCK /*!< Use rw_locks to control + access to this hash_table. */ +}; + /*************************************************************//** Creates a hash table with >= n array cells. The actual number of cells is chosen to be a prime number slightly bigger than n. @@ -51,21 +64,29 @@ hash_create( ulint n); /*!< in: number of array cells */ #ifndef UNIV_HOTBACKUP /*************************************************************//** -Creates a mutex array to protect a hash table. */ +Creates a sync object array array to protect a hash table. +::sync_obj can be mutexes or rw_locks depening on the type of +hash table. */ UNIV_INTERN void -hash_create_mutexes_func( -/*=====================*/ - hash_table_t* table, /*!< in: hash table */ +hash_create_sync_obj_func( +/*======================*/ + hash_table_t* table, /*!< in: hash table */ + enum hash_table_sync_t type, /*!< in: HASH_TABLE_SYNC_MUTEX + or HASH_TABLE_SYNC_RW_LOCK */ #ifdef UNIV_SYNC_DEBUG - ulint sync_level, /*!< in: latching order level of the - mutexes: used in the debug version */ + ulint sync_level,/*!< in: latching order level + of the mutexes: used in the + debug version */ #endif /* UNIV_SYNC_DEBUG */ - ulint n_mutexes); /*!< in: number of mutexes */ + ulint n_sync_obj);/*!< in: number of sync objects, + must be a power of 2 */ #ifdef UNIV_SYNC_DEBUG -# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n) +# define hash_create_sync_obj(t, s, n, level) \ + hash_create_sync_obj_func(t, s, level, n) #else /* UNIV_SYNC_DEBUG */ -# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n) +# define hash_create_sync_obj(t, s, n, level) \ + hash_create_sync_obj_func(t, s, n) #endif /* UNIV_SYNC_DEBUG */ #endif /* !UNIV_HOTBACKUP */ @@ -87,11 +108,12 @@ hash_calc_hash( hash_table_t* table); /*!< in: hash table */ #ifndef UNIV_HOTBACKUP /********************************************************************//** -Assert that the mutex for the table in a hash operation is owned. */ -# define HASH_ASSERT_OWNED(TABLE, FOLD) \ -ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD))); +Assert that the mutex for the table is held */ +# define HASH_ASSERT_OWN(TABLE, FOLD) \ + ut_ad((TABLE)->type != HASH_TABLE_SYNC_MUTEX \ + || (mutex_own(hash_get_mutex((TABLE), FOLD)))); #else /* !UNIV_HOTBACKUP */ -# define HASH_ASSERT_OWNED(TABLE, FOLD) +# define HASH_ASSERT_OWN(TABLE, FOLD) #endif /* !UNIV_HOTBACKUP */ /*******************************************************************//** @@ -102,7 +124,7 @@ do {\ hash_cell_t* cell3333;\ TYPE* struct3333;\ \ - HASH_ASSERT_OWNED(TABLE, FOLD)\ + HASH_ASSERT_OWN(TABLE, FOLD)\ \ (DATA)->NAME = NULL;\ \ @@ -124,7 +146,7 @@ do {\ #ifdef UNIV_HASH_DEBUG # define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1) -# define HASH_INVALIDATE(DATA, NAME) DATA->NAME = (void*) -1 +# define HASH_INVALIDATE(DATA, NAME) *(void**) (&DATA->NAME) = (void*) -1 #else # define HASH_ASSERT_VALID(DATA) do {} while (0) # define HASH_INVALIDATE(DATA, NAME) do {} while (0) @@ -138,7 +160,7 @@ do {\ hash_cell_t* cell3333;\ TYPE* struct3333;\ \ - HASH_ASSERT_OWNED(TABLE, FOLD)\ + HASH_ASSERT_OWN(TABLE, FOLD)\ \ cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\ \ @@ -175,7 +197,7 @@ Looks for a struct in a hash table. */ #define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\ {\ \ - HASH_ASSERT_OWNED(TABLE, FOLD)\ + HASH_ASSERT_OWN(TABLE, FOLD)\ \ (DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\ HASH_ASSERT_VALID(DATA);\ @@ -259,7 +281,7 @@ do {\ \ HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\ \ - top_node111 = (TYPE*)mem_heap_get_top(\ + top_node111 = (TYPE*) mem_heap_get_top(\ hash_get_heap(TABLE, fold111),\ sizeof(TYPE));\ \ @@ -284,11 +306,12 @@ do {\ } else {\ /* We have to look for the predecessor of the top\ node */\ - node111 = cell111->node;\ + node111 = static_cast<TYPE*>(cell111->node);\ \ while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\ \ - node111 = HASH_GET_NEXT(NAME, node111);\ + node111 = static_cast<TYPE*>(\ + HASH_GET_NEXT(NAME, node111));\ }\ \ /* Now we have the predecessor node */\ @@ -329,12 +352,12 @@ do {\ } while (0) /************************************************************//** -Gets the mutex index for a fold value in a hash table. -@return mutex number */ +Gets the sync object index for a fold value in a hash table. +@return index */ UNIV_INLINE ulint -hash_get_mutex_no( -/*==============*/ +hash_get_sync_obj_index( +/*====================*/ hash_table_t* table, /*!< in: hash table */ ulint fold); /*!< in: fold */ /************************************************************//** @@ -365,6 +388,15 @@ hash_get_nth_mutex( hash_table_t* table, /*!< in: hash table */ ulint i); /*!< in: index of the mutex */ /************************************************************//** +Gets the nth rw_lock in a hash table. +@return rw_lock */ +UNIV_INLINE +rw_lock_t* +hash_get_nth_lock( +/*==============*/ + hash_table_t* table, /*!< in: hash table */ + ulint i); /*!< in: index of the rw_lock */ +/************************************************************//** Gets the mutex for a fold value in a hash table. @return mutex */ UNIV_INLINE @@ -374,6 +406,15 @@ hash_get_mutex( hash_table_t* table, /*!< in: hash table */ ulint fold); /*!< in: fold */ /************************************************************//** +Gets the rw_lock for a fold value in a hash table. +@return rw_lock */ +UNIV_INLINE +rw_lock_t* +hash_get_lock( +/*==========*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold); /*!< in: fold */ +/************************************************************//** Reserves the mutex for a fold value in a hash table. */ UNIV_INTERN void @@ -403,10 +444,84 @@ void hash_mutex_exit_all( /*================*/ hash_table_t* table); /*!< in: hash table */ +/************************************************************//** +Releases all but the passed in mutex of a hash table. */ +UNIV_INTERN +void +hash_mutex_exit_all_but( +/*====================*/ + hash_table_t* table, /*!< in: hash table */ + mutex_t* keep_mutex); /*!< in: mutex to keep */ +/************************************************************//** +s-lock a lock for a fold value in a hash table. */ +UNIV_INTERN +void +hash_lock_s( +/*========*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold); /*!< in: fold */ +/************************************************************//** +x-lock a lock for a fold value in a hash table. */ +UNIV_INTERN +void +hash_lock_x( +/*========*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold); /*!< in: fold */ +/************************************************************//** +unlock an s-lock for a fold value in a hash table. */ +UNIV_INTERN +void +hash_unlock_s( +/*==========*/ + + hash_table_t* table, /*!< in: hash table */ + ulint fold); /*!< in: fold */ +/************************************************************//** +unlock x-lock for a fold value in a hash table. */ +UNIV_INTERN +void +hash_unlock_x( +/*==========*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold); /*!< in: fold */ +/************************************************************//** +Reserves all the locks of a hash table, in an ascending order. */ +UNIV_INTERN +void +hash_lock_x_all( +/*============*/ + hash_table_t* table); /*!< in: hash table */ +/************************************************************//** +Releases all the locks of a hash table, in an ascending order. */ +UNIV_INTERN +void +hash_unlock_x_all( +/*==============*/ + hash_table_t* table); /*!< in: hash table */ +/************************************************************//** +Releases all but passed in lock of a hash table, */ +UNIV_INTERN +void +hash_unlock_x_all_but( +/*==================*/ + hash_table_t* table, /*!< in: hash table */ + rw_lock_t* keep_lock); /*!< in: lock to keep */ + #else /* !UNIV_HOTBACKUP */ # define hash_get_heap(table, fold) ((table)->heap) # define hash_mutex_enter(table, fold) ((void) 0) # define hash_mutex_exit(table, fold) ((void) 0) +# define hash_mutex_enter_all(table) ((void) 0) +# define hash_mutex_exit_all(table) ((void) 0) +# define hash_mutex_exit_all_but(t, m) ((void) 0) +# define hash_lock_s(t, f) ((void) 0) +# define hash_lock_x(t, f) ((void) 0) +# define hash_unlock_s(t, f) ((void) 0) +# define hash_unlock_x(t, f) ((void) 0) +# define hash_lock_x_all(t) ((void) 0) +# define hash_unlock_x_all(t) ((void) 0) +# define hash_unlock_x_all_but(t, l) ((void) 0) #endif /* !UNIV_HOTBACKUP */ struct hash_cell_struct{ @@ -415,27 +530,40 @@ struct hash_cell_struct{ /* The hash table structure */ struct hash_table_struct { + enum hash_table_sync_t type; /*<! type of hash_table. */ #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG # ifndef UNIV_HOTBACKUP - ibool adaptive;/* TRUE if this is the hash table of the - adaptive hash index */ + ibool adaptive;/* TRUE if this is the hash + table of the adaptive hash + index */ # endif /* !UNIV_HOTBACKUP */ #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ - ulint n_cells;/* number of cells in the hash table */ - hash_cell_t* array; /*!< pointer to cell array */ + ulint n_cells;/* number of cells in the hash table */ + hash_cell_t* array; /*!< pointer to cell array */ #ifndef UNIV_HOTBACKUP - ulint n_mutexes;/* if mutexes != NULL, then the number of - mutexes, must be a power of 2 */ - mutex_t* mutexes;/* NULL, or an array of mutexes used to - protect segments of the hash table */ - mem_heap_t** heaps; /*!< if this is non-NULL, hash chain nodes for - external chaining can be allocated from these - memory heaps; there are then n_mutexes many of - these heaps */ + ulint n_sync_obj;/* if sync_objs != NULL, then + the number of either the number + of mutexes or the number of + rw_locks depending on the type. + Must be a power of 2 */ + union { + mutex_t* mutexes;/* NULL, or an array of mutexes + used to protect segments of the + hash table */ + rw_lock_t* rw_locks;/* NULL, or an array of rw_lcoks + used to protect segments of the + hash table */ + } sync_obj; + + mem_heap_t** heaps; /*!< if this is non-NULL, hash + chain nodes for external chaining + can be allocated from these memory + heaps; there are then n_mutexes + many of these heaps */ #endif /* !UNIV_HOTBACKUP */ - mem_heap_t* heap; + mem_heap_t* heap; #ifdef UNIV_DEBUG - ulint magic_n; + ulint magic_n; # define HASH_TABLE_MAGIC_N 76561114 #endif /* UNIV_DEBUG */ }; diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic index 0b437894e2e..1e5474601d5 100644 --- a/storage/innobase/include/hash0hash.ic +++ b/storage/innobase/include/hash0hash.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -87,20 +87,21 @@ hash_calc_hash( #ifndef UNIV_HOTBACKUP /************************************************************//** -Gets the mutex index for a fold value in a hash table. -@return mutex number */ +Gets the sync object index for a fold value in a hash table. +@return index */ UNIV_INLINE ulint -hash_get_mutex_no( -/*==============*/ +hash_get_sync_obj_index( +/*====================*/ hash_table_t* table, /*!< in: hash table */ ulint fold) /*!< in: fold */ { ut_ad(table); ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_ad(ut_is_2pow(table->n_mutexes)); + ut_ad(table->type != HASH_TABLE_SYNC_NONE); + ut_ad(ut_is_2pow(table->n_sync_obj)); return(ut_2pow_remainder(hash_calc_hash(fold, table), - table->n_mutexes)); + table->n_sync_obj)); } /************************************************************//** @@ -115,7 +116,8 @@ hash_get_nth_heap( { ut_ad(table); ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_ad(i < table->n_mutexes); + ut_ad(table->type != HASH_TABLE_SYNC_NONE); + ut_ad(i < table->n_sync_obj); return(table->heaps[i]); } @@ -139,7 +141,7 @@ hash_get_heap( return(table->heap); } - i = hash_get_mutex_no(table, fold); + i = hash_get_sync_obj_index(table, fold); return(hash_get_nth_heap(table, i)); } @@ -156,9 +158,10 @@ hash_get_nth_mutex( { ut_ad(table); ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - ut_ad(i < table->n_mutexes); + ut_ad(table->type == HASH_TABLE_SYNC_MUTEX); + ut_ad(i < table->n_sync_obj); - return(table->mutexes + i); + return(table->sync_obj.mutexes + i); } /************************************************************//** @@ -176,8 +179,47 @@ hash_get_mutex( ut_ad(table); ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); - i = hash_get_mutex_no(table, fold); + i = hash_get_sync_obj_index(table, fold); return(hash_get_nth_mutex(table, i)); } + +/************************************************************//** +Gets the nth rw_lock in a hash table. +@return rw_lock */ +UNIV_INLINE +rw_lock_t* +hash_get_nth_lock( +/*==============*/ + hash_table_t* table, /*!< in: hash table */ + ulint i) /*!< in: index of the rw_lock */ +{ + ut_ad(table); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); + ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); + ut_ad(i < table->n_sync_obj); + + return(table->sync_obj.rw_locks + i); +} + +/************************************************************//** +Gets the rw_lock for a fold value in a hash table. +@return rw_lock */ +UNIV_INLINE +rw_lock_t* +hash_get_lock( +/*==========*/ + hash_table_t* table, /*!< in: hash table */ + ulint fold) /*!< in: fold */ +{ + ulint i; + + ut_ad(table); + ut_ad(table->type == HASH_TABLE_SYNC_RW_LOCK); + ut_ad(table->magic_n == HASH_TABLE_MAGIC_N); + + i = hash_get_sync_obj_index(table, fold); + + return(hash_get_nth_lock(table, i)); +} #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h index 28c97fd609f..f405ebf8d11 100644 --- a/storage/innobase/include/ibuf0ibuf.h +++ b/storage/innobase/include/ibuf0ibuf.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -35,6 +35,10 @@ Created 7/19/1997 Heikki Tuuri #ifndef UNIV_HOTBACKUP # include "ibuf0types.h" +/** Default value for maximum on-disk size of change buffer in terms +of percentage of the buffer pool. */ +#define CHANGE_BUFFER_DEFAULT_SIZE (25) + /* Possible operations buffered in the insert/whatever buffer. See ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */ typedef enum { @@ -98,6 +102,14 @@ void ibuf_init_at_db_start(void); /*=======================*/ /*********************************************************************//** +Updates the max_size value for ibuf. */ +UNIV_INTERN +void +ibuf_max_size_update( +/*=================*/ + ulint new_val); /*!< in: new value in terms of + percentage of the buffer pool size */ +/*********************************************************************//** Reads the biggest tablespace id from the high end of the insert buffer tree and updates the counter in fil_system. */ UNIV_INTERN @@ -376,14 +388,12 @@ will be merged from ibuf trees to the pages read, 0 if ibuf is empty */ UNIV_INTERN ulint -ibuf_contract_for_n_pages( -/*======================*/ - ibool sync, /*!< in: TRUE if the caller wants to wait for the - issued read with the highest tablespace address - to complete */ - ulint n_pages);/*!< in: try to read at least this many pages to - the buffer pool and merge the ibuf contents to - them */ +ibuf_contract_in_background( +/*========================*/ + ibool full); /*!< in: TRUE if the caller wants to do a full + contract based on PCT_IO(100). If FALSE then + the size of contract batch is determined based + on the current size of the ibuf tree. */ #endif /* !UNIV_HOTBACKUP */ /*********************************************************************//** Parses a redo log record of an ibuf bitmap page init. @@ -452,6 +462,36 @@ for the file segment from which the pages for the ibuf tree are allocated */ /* The insert buffer tree itself is always located in space 0. */ #define IBUF_SPACE_ID 0 +/** Insert buffer struct */ +struct ibuf_struct{ + ulint size; /*!< current size of the ibuf index + tree, in pages */ + ulint max_size; /*!< recommended maximum size of the + ibuf index tree, in pages */ + ulint seg_size; /*!< allocated pages of the file + segment containing ibuf header and + tree */ + ibool empty; /*!< Protected by the page + latch of the root page of the + insert buffer tree + (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE + if and only if the insert + buffer tree is empty. */ + ulint free_list_len; /*!< length of the free list */ + ulint height; /*!< tree height */ + dict_index_t* index; /*!< insert buffer index */ + + ulint n_merges; /*!< number of pages merged */ + ulint n_merged_ops[IBUF_OP_COUNT]; + /*!< number of operations of each type + merged to index pages */ + ulint n_discarded_ops[IBUF_OP_COUNT]; + /*!< number of operations of each type + discarded without merging due to the + tablespace being deleted or the + index being dropped */ +}; + #ifndef UNIV_NONINL #include "ibuf0ibuf.ic" #endif diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic index 0a22667a260..8a4ec633b01 100644 --- a/storage/innobase/include/ibuf0ibuf.ic +++ b/storage/innobase/include/ibuf0ibuf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -28,9 +28,6 @@ Created 7/19/1997 Heikki Tuuri #ifndef UNIV_HOTBACKUP #include "buf0lru.h" -/** Counter for ibuf_should_try() */ -extern ulint ibuf_flush_count; - /** An index page must contain at least UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to buffer inserts to this page. If there is this much of free space, the @@ -61,36 +58,6 @@ ibuf_mtr_commit( mtr_commit(mtr); } -/** Insert buffer struct */ -struct ibuf_struct{ - ulint size; /*!< current size of the ibuf index - tree, in pages */ - ulint max_size; /*!< recommended maximum size of the - ibuf index tree, in pages */ - ulint seg_size; /*!< allocated pages of the file - segment containing ibuf header and - tree */ - ibool empty; /*!< Protected by the page - latch of the root page of the - insert buffer tree - (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE - if and only if the insert - buffer tree is empty. */ - ulint free_list_len; /*!< length of the free list */ - ulint height; /*!< tree height */ - dict_index_t* index; /*!< insert buffer index */ - - ulint n_merges; /*!< number of pages merged */ - ulint n_merged_ops[IBUF_OP_COUNT]; - /*!< number of operations of each type - merged to index pages */ - ulint n_discarded_ops[IBUF_OP_COUNT]; - /*!< number of operations of each type - discarded without merging due to the - tablespace being deleted or the - index being dropped */ -}; - /************************************************************************//** Sets the free bit of the page in the ibuf bitmap. This is done in a separate mini-transaction, hence this operation does not restrict further work to only @@ -127,21 +94,10 @@ ibuf_should_try( a secondary index when we decide */ { - if (ibuf_use != IBUF_USE_NONE - && !dict_index_is_clust(index) - && (ignore_sec_unique || !dict_index_is_unique(index))) { - - ibuf_flush_count++; - - if (ibuf_flush_count % 4 == 0) { - - buf_LRU_try_free_flushed_blocks(NULL); - } - - return(TRUE); - } - - return(FALSE); + return(ibuf_use != IBUF_USE_NONE + && ibuf->max_size != 0 + && !dict_index_is_clust(index) + && (ignore_sec_unique || !dict_index_is_unique(index))); } /******************************************************************//** @@ -174,12 +130,11 @@ ibuf_bitmap_page( ut_ad(ut_is_2pow(zip_size)); if (!zip_size) { - return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1)) - == FSP_IBUF_BITMAP_OFFSET)); + return((page_no & (UNIV_PAGE_SIZE - 1)) + == FSP_IBUF_BITMAP_OFFSET); } - return(UNIV_UNLIKELY((page_no & (zip_size - 1)) - == FSP_IBUF_BITMAP_OFFSET)); + return((page_no & (zip_size - 1)) == FSP_IBUF_BITMAP_OFFSET); } /*********************************************************************//** @@ -197,7 +152,7 @@ ibuf_index_page_calc_free_bits( ulint n; ut_ad(ut_is_2pow(zip_size)); ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE); - ut_ad(zip_size <= UNIV_PAGE_SIZE); + ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); if (zip_size) { n = max_ins_size @@ -232,7 +187,7 @@ ibuf_index_page_calc_free_from_bits( ut_ad(bits < 4); ut_ad(ut_is_2pow(zip_size)); ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE); - ut_ad(zip_size <= UNIV_PAGE_SIZE); + ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX); if (zip_size) { if (bits == 3) { @@ -274,9 +229,9 @@ ibuf_index_page_calc_free_zip( zip_max_ins = page_zip_max_ins_size(page_zip, FALSE/* not clustered */); - if (UNIV_UNLIKELY(zip_max_ins < 0)) { + if (zip_max_ins < 0) { return(0); - } else if (UNIV_LIKELY(max_ins_size > (ulint) zip_max_ins)) { + } else if (max_ins_size > (ulint) zip_max_ins) { max_ins_size = (ulint) zip_max_ins; } diff --git a/storage/innobase/include/ibuf0types.h b/storage/innobase/include/ibuf0types.h index 55944f879b2..e404b62a011 100644 --- a/storage/innobase/include/ibuf0types.h +++ b/storage/innobase/include/ibuf0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h index 25a57c9740c..42b4f7281e4 100644 --- a/storage/innobase/include/lock0iter.h +++ b/storage/innobase/include/lock0iter.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index 29fdc3bbe97..fa1e6d50224 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -36,13 +36,12 @@ Created 5/7/1996 Heikki Tuuri #include "lock0types.h" #include "read0types.h" #include "hash0hash.h" +#include "srv0srv.h" #include "ut0vec.h" #ifdef UNIV_DEBUG extern ibool lock_print_waits; #endif /* UNIV_DEBUG */ -/* Buffer for storing information about the most recent deadlock error */ -extern FILE* lock_latest_err_file; /*********************************************************************//** Gets the size of a lock struct. @@ -65,18 +64,6 @@ void lock_sys_close(void); /*================*/ /*********************************************************************//** -Checks if some transaction has an implicit x-lock on a record in a clustered -index. -@return transaction which has the x-lock, or NULL */ -UNIV_INLINE -trx_t* -lock_clust_rec_some_has_impl( -/*=========================*/ - const rec_t* rec, /*!< in: user record */ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ - __attribute__((nonnull, warn_unused_result)); -/*********************************************************************//** Gets the heap_no of the smallest user record on a page. @return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */ UNIV_INLINE @@ -271,14 +258,15 @@ lock_rec_restore_from_page_infimum( state; lock bits are reset on the infimum */ /*********************************************************************//** -Returns TRUE if there are explicit record locks on a page. -@return TRUE if there are explicit record locks on the page */ +Determines if there are explicit record locks on a page. +@return an explicit record lock on the page, or NULL if there are none */ UNIV_INTERN -ibool +lock_t* lock_rec_expl_exist_on_page( /*========================*/ ulint space, /*!< in: space id */ - ulint page_no);/*!< in: page number */ + ulint page_no)/*!< in: page number */ + __attribute__((warn_unused_result)); /*********************************************************************//** Checks if locks of other transactions prevent an immediate insert of a record. If they do, first tests if the query thread should anyway @@ -467,7 +455,8 @@ lock_table( /*=======*/ ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set, does nothing */ - dict_table_t* table, /*!< in: database table in dictionary cache */ + dict_table_t* table, /*!< in/out: database table + in dictionary cache */ enum lock_mode mode, /*!< in: lock mode */ que_thr_t* thr); /*!< in: query thread */ /*************************************************************//** @@ -478,28 +467,20 @@ UNIV_INTERN void lock_rec_unlock( /*============*/ - trx_t* trx, /*!< in: transaction that has + trx_t* trx, /*!< in/out: transaction that has set a record lock */ const buf_block_t* block, /*!< in: buffer block containing rec */ const rec_t* rec, /*!< in: record */ enum lock_mode lock_mode);/*!< in: LOCK_S or LOCK_X */ /*********************************************************************//** -Releases transaction locks, and releases possible other transactions waiting -because of these locks. */ +Releases a transaction's locks, and releases possible other transactions +waiting because of these locks. Change the state of the transaction to +TRX_STATE_COMMITTED_IN_MEMORY. */ UNIV_INTERN void -lock_release_off_kernel( -/*====================*/ - trx_t* trx); /*!< in: transaction */ -/*********************************************************************//** -Cancels a waiting lock request and releases possible other transactions -waiting behind it. */ -UNIV_INTERN -void -lock_cancel_waiting_and_release( -/*============================*/ - lock_t* lock); /*!< in: waiting lock request */ - +lock_trx_release_locks( +/*===================*/ + trx_t* trx); /*!< in/out: transaction */ /*********************************************************************//** Removes locks on a table to be dropped or truncated. If remove_also_table_sx_locks is TRUE then table-level S and X locks are @@ -572,8 +553,9 @@ UNIV_INTERN ibool lock_is_table_exclusive( /*====================*/ - dict_table_t* table, /*!< in: table */ - trx_t* trx); /*!< in: transaction */ + const dict_table_t* table, /*!< in: table */ + const trx_t* trx) /*!< in: transaction */ + __attribute__((nonnull)); /*********************************************************************//** Checks if a lock request lock1 has to wait for request lock2. @return TRUE if lock1 has to wait for lock2 to be removed */ @@ -587,18 +569,17 @@ lock_has_to_wait( on the same record as in lock1 if the locks are record locks */ /*********************************************************************//** -Checks that a transaction id is sensible, i.e., not in the future. -@return TRUE if ok */ +Reports that a transaction id is insensible, i.e., in the future. */ UNIV_INTERN -ibool -lock_check_trx_id_sanity( -/*=====================*/ +void +lock_report_trx_id_insanity( +/*========================*/ trx_id_t trx_id, /*!< in: trx id */ const rec_t* rec, /*!< in: user record */ - dict_index_t* index, /*!< in: clustered index */ + dict_index_t* index, /*!< in: index */ const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */ - ibool has_kernel_mutex);/*!< in: TRUE if the caller owns the - kernel mutex */ + trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */ + __attribute__((nonnull)); /*********************************************************************//** Prints info of a table lock. */ UNIV_INTERN @@ -617,16 +598,19 @@ lock_rec_print( const lock_t* lock); /*!< in: record type lock */ /*********************************************************************//** Prints info of locks for all transactions. -@return FALSE if not able to obtain kernel mutex -and exits without printing info */ +@return FALSE if not able to obtain lock mutex and exits without +printing info */ UNIV_INTERN ibool lock_print_info_summary( /*====================*/ FILE* file, /*!< in: file where to print */ - ibool nowait);/*!< in: whether to wait for the kernel mutex */ -/************************************************************************* -Prints info of locks for each transaction. */ + ibool nowait) /*!< in: whether to wait for the lock mutex */ + __attribute__((nonnull, warn_unused_result)); +/*********************************************************************//** +Prints info of locks for each transaction. This function assumes that the +caller holds the lock mutex and more importantly it will release the lock +mutex on behalf of the caller. (This should be fixed in the future). */ UNIV_INTERN void lock_print_info_all_transactions( @@ -635,27 +619,14 @@ lock_print_info_all_transactions( /*********************************************************************//** Return approximate number or record locks (bits set in the bitmap) for this transaction. Since delete-marked records may be removed, the -record count will not be precise. */ +record count will not be precise. +The caller must be holding lock_sys->mutex. */ UNIV_INTERN ulint lock_number_of_rows_locked( /*=======================*/ - const trx_t* trx); /*!< in: transaction */ -/*******************************************************************//** -Check if a transaction holds any autoinc locks. -@return TRUE if the transaction holds any AUTOINC locks. */ -UNIV_INTERN -ibool -lock_trx_holds_autoinc_locks( -/*=========================*/ - const trx_t* trx); /*!< in: transaction */ -/*******************************************************************//** -Release all the transaction's autoinc locks. */ -UNIV_INTERN -void -lock_release_autoinc_locks( -/*=======================*/ - trx_t* trx); /*!< in/out: transaction */ + const trx_lock_t* trx_lock) /*!< in: transaction locks */ + __attribute__((nonnull, warn_unused_result)); /*******************************************************************//** Gets the type of a lock. Non-inline version for using outside of the @@ -751,7 +722,78 @@ ulint lock_rec_get_page_no( /*=================*/ const lock_t* lock); /*!< in: lock */ +/*******************************************************************//** +Check if there are any locks (table or rec) against table. +@return TRUE if locks exist */ +UNIV_INTERN +ibool +lock_table_has_locks( +/*=================*/ + const dict_table_t* table); /*!< in: check if there are any locks + held on records in this table or on the + table itself */ +/*********************************************************************//** +A thread which wakes up threads whose lock wait may have lasted too long. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(lock_wait_timeout_thread)( +/*=====================================*/ + void* arg); /*!< in: a dummy parameter required by + os_thread_create */ + +/********************************************************************//** +Releases a user OS thread waiting for a lock to be released, if the +thread is already suspended. */ +UNIV_INTERN +void +lock_wait_release_thread_if_suspended( +/*==================================*/ + que_thr_t* thr); /*!< in: query thread associated with the + user OS thread */ + +/***************************************************************//** +Puts a user OS thread to wait for a lock to be released. If an error +occurs during the wait trx->error_state associated with thr is +!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK +are possible errors. DB_DEADLOCK is returned if selective deadlock +resolution chose this transaction as a victim. */ +UNIV_INTERN +void +lock_wait_suspend_thread( +/*=====================*/ + que_thr_t* thr); /*!< in: query thread associated with the + user OS thread */ +/*********************************************************************//** +Unlocks AUTO_INC type locks that were possibly reserved by a trx. This +function should be called at the the end of an SQL statement, by the +connection thread that owns the transaction (trx->mysql_thd). */ +UNIV_INTERN +void +lock_unlock_table_autoinc( +/*======================*/ + trx_t* trx); /*!< in/out: transaction */ +/*********************************************************************//** +Check whether the transaction has already been rolled back because it +was selected as a deadlock victim, or if it has to wait then cancel +the wait lock. +@return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */ +UNIV_INTERN +enum db_err +lock_trx_handle_wait( +/*=================*/ + trx_t* trx) /*!< in/out: trx lock state */ + __attribute__((nonnull)); +/*********************************************************************//** +Get the number of locks on a table. +@return number of locks */ +UNIV_INTERN +ulint +lock_table_get_n_locks( +/*===================*/ + const dict_table_t* table) /*!< in: table */ + __attribute__((nonnull)); /** Lock modes and types */ /* @{ */ #define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the @@ -815,12 +857,66 @@ struct lock_op_struct{ /** The lock system struct */ struct lock_sys_struct{ - hash_table_t* rec_hash; /*!< hash table of the record locks */ + mutex_t mutex; /*!< Mutex protecting the + locks */ + hash_table_t* rec_hash; /*!< hash table of the record + locks */ + mutex_t wait_mutex; /*!< Mutex protecting the + next two fields */ + srv_slot_t* waiting_threads; /*!< Array of user threads + suspended while waiting for + locks within InnoDB, protected + by the lock_sys->wait_mutex */ + srv_slot_t* last_slot; /*!< highest slot ever used + in the waiting_threads array, + protected by + lock_sys->wait_mutex */ + ibool rollback_complete; + /*!< TRUE if rollback of all + recovered transactions is + complete. Protected by + lock_sys->mutex */ }; /** The lock system */ extern lock_sys_t* lock_sys; +/** Test if lock_sys->mutex can be acquired without waiting. */ +#define lock_mutex_enter_nowait() mutex_enter_nowait(&lock_sys->mutex) + +/** Test if lock_sys->mutex is owned. */ +#define lock_mutex_own() mutex_own(&lock_sys->mutex) + +/** Acquire the lock_sys->mutex. */ +#define lock_mutex_enter() do { \ + mutex_enter(&lock_sys->mutex); \ +} while (0) + +/** Release the lock_sys->mutex. */ +#define lock_mutex_exit() do { \ + mutex_exit(&lock_sys->mutex); \ +} while (0) + +/** Test if lock_sys->wait_mutex is owned. */ +#define lock_wait_mutex_own() mutex_own(&lock_sys->wait_mutex) + +/** Acquire the lock_sys->wait_mutex. */ +#define lock_wait_mutex_enter() do { \ + mutex_enter(&lock_sys->wait_mutex); \ +} while (0) + +/** Release the lock_sys->wait_mutex. */ +#define lock_wait_mutex_exit() do { \ + mutex_exit(&lock_sys->wait_mutex); \ +} while (0) + +// FIXME: Move these to lock_sys_t +extern ibool srv_lock_timeout_active; +extern ulint srv_n_lock_wait_count; +extern ulint srv_n_lock_wait_current_count; +extern ib_int64_t srv_n_lock_wait_time; +extern ulint srv_n_lock_max_wait_time; +extern os_event_t srv_lock_timeout_thread_event; #ifndef UNIV_NONINL #include "lock0lock.ic" diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic index 1d740a5fa43..736936954cb 100644 --- a/storage/innobase/include/lock0lock.ic +++ b/storage/innobase/include/lock0lock.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -68,35 +68,6 @@ lock_rec_hash( } /*********************************************************************//** -Checks if some transaction has an implicit x-lock on a record in a clustered -index. -@return transaction which has the x-lock, or NULL */ -UNIV_INLINE -trx_t* -lock_clust_rec_some_has_impl( -/*=========================*/ - const rec_t* rec, /*!< in: user record */ - const dict_index_t* index, /*!< in: clustered index */ - const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ -{ - trx_id_t trx_id; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(dict_index_is_clust(index)); - ut_ad(page_rec_is_user_rec(rec)); - - trx_id = row_get_rec_trx_id(rec, index, offsets); - - if (trx_is_active(trx_id)) { - /* The modifying or inserting transaction is active */ - - return(trx_get_on_id(trx_id)); - } - - return(NULL); -} - -/*********************************************************************//** Gets the heap_no of the smallest user record on a page. @return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */ UNIV_INLINE diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h index 287c151b19f..d516289e1f2 100644 --- a/storage/innobase/include/lock0priv.h +++ b/storage/innobase/include/lock0priv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -62,7 +62,7 @@ struct lock_rec_struct { lock struct */ }; -/** Lock struct */ +/** Lock struct; protected by lock_sys->mutex */ struct lock_struct { trx_t* trx; /*!< transaction owning the lock */ @@ -101,6 +101,28 @@ lock_rec_get_prev( const lock_t* in_lock,/*!< in: record lock */ ulint heap_no);/*!< in: heap number of the record */ +/*********************************************************************//** +Cancels a waiting lock request and releases possible other transactions +waiting behind it. */ +UNIV_INTERN +void +lock_cancel_waiting_and_release( +/*============================*/ + lock_t* lock); /*!< in/out: waiting lock request */ + +/*********************************************************************//** +Checks if some transaction has an implicit x-lock on a record in a clustered +index. +@return transaction id of the transaction which has the x-lock, or 0 */ +UNIV_INLINE +trx_id_t +lock_clust_rec_some_has_impl( +/*=========================*/ + const rec_t* rec, /*!< in: user record */ + const dict_index_t* index, /*!< in: clustered index */ + const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ + __attribute__((nonnull, warn_unused_result)); + #ifndef UNIV_NONINL #include "lock0priv.ic" #endif diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic index 30447c99848..6b70dc33d3c 100644 --- a/storage/innobase/include/lock0priv.ic +++ b/storage/innobase/include/lock0priv.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2007, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -24,8 +24,8 @@ Created July 16, 2007 Vasil Dimov *******************************************************/ /* This file contains only methods which are used in -lock/lock0* files, other than lock/lock0lock.c. -I.e. lock/lock0lock.c contains more internal inline +lock/lock0* files, other than lock/lock0lock.cc. +I.e. lock/lock0lock.cc contains more internal inline methods but they are used only in that file. */ #ifndef LOCK_MODULE_IMPLEMENTATION @@ -46,4 +46,22 @@ lock_get_type_low( return(lock->type_mode & LOCK_TYPE_MASK); } +/*********************************************************************//** +Checks if some transaction has an implicit x-lock on a record in a clustered +index. +@return transaction id of the transaction which has the x-lock, or 0 */ +UNIV_INLINE +trx_id_t +lock_clust_rec_some_has_impl( +/*=========================*/ + const rec_t* rec, /*!< in: user record */ + const dict_index_t* index, /*!< in: clustered index */ + const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ +{ + ut_ad(dict_index_is_clust(index)); + ut_ad(page_rec_is_user_rec(rec)); + + return(row_get_rec_trx_id(rec, index, offsets)); +} + /* vim: set filetype=c: */ diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h index 45f29e90fe9..16e6b2e0113 100644 --- a/storage/innobase/include/lock0types.h +++ b/storage/innobase/include/lock0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -39,7 +39,9 @@ enum lock_mode { LOCK_AUTO_INC, /* locks the auto-inc counter of a table in an exclusive mode */ LOCK_NONE, /* this is used elsewhere to note consistent read */ - LOCK_NUM = LOCK_NONE/* number of lock modes */ + LOCK_NUM = LOCK_NONE, /* number of lock modes */ + LOCK_NONE_UNSET = 255 }; + #endif diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index f2ab6a9898d..6d27d9d4f10 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -41,6 +41,12 @@ Created 12/9/1995 Heikki Tuuri #include "sync0rw.h" #endif /* !UNIV_HOTBACKUP */ +/* Type used for all log sequence number storage and arithmetics */ +typedef ib_uint64_t lsn_t; +#define LSN_MAX IB_UINT64_MAX + +#define LSN_PF UINT64PF + /** Redo log buffer */ typedef struct log_struct log_t; /** Redo log group */ @@ -64,17 +70,6 @@ extern ibool log_debug_writes; /** Maximum number of log groups in log_group_struct::checkpoint_buf */ #define LOG_MAX_N_GROUPS 32 -#ifndef UNIV_HOTBACKUP -/****************************************************************//** -Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint, -so that we know that the limit has been written to a log checkpoint field -on disk. */ -UNIV_INTERN -void -log_fsp_current_free_limit_set_and_checkpoint( -/*==========================================*/ - ulint limit); /*!< in: limit to set */ -#endif /* !UNIV_HOTBACKUP */ /*******************************************************************//** Calculates where in log files we find a specified lsn. @return log file number */ @@ -98,12 +93,12 @@ Writes to the log the string given. The log must be released with log_release. @return end lsn of the log record, zero if did not succeed */ UNIV_INLINE -ib_uint64_t +lsn_t log_reserve_and_write_fast( /*=======================*/ const void* str, /*!< in: string */ ulint len, /*!< in: string length */ - ib_uint64_t* start_lsn);/*!< out: start lsn of the log record */ + lsn_t* start_lsn);/*!< out: start lsn of the log record */ /***********************************************************************//** Releases the log mutex. */ UNIV_INLINE @@ -124,7 +119,7 @@ Opens the log for log_write_low. The log must be closed with log_close and released with log_release. @return start lsn of the log record */ UNIV_INTERN -ib_uint64_t +lsn_t log_reserve_and_open( /*=================*/ ulint len); /*!< in: length of data to be catenated */ @@ -141,14 +136,14 @@ log_write_low( Closes the log. @return lsn */ UNIV_INTERN -ib_uint64_t +lsn_t log_close(void); /*===========*/ /************************************************************//** Gets the current lsn. @return current lsn */ UNIV_INLINE -ib_uint64_t +lsn_t log_get_lsn(void); /*=============*/ /**************************************************************** @@ -156,7 +151,7 @@ Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. @return log group capacity */ UNIV_INLINE -ulint +lsn_t log_get_capacity(void); /*==================*/ /******************************************************//** @@ -173,7 +168,7 @@ log_group_init( /*===========*/ ulint id, /*!< in: group id */ ulint n_files, /*!< in: number of log files */ - ulint file_size, /*!< in: log file size in bytes */ + lsn_t file_size, /*!< in: log file size in bytes */ ulint space_id, /*!< in: space id of the file space which contains the log files of this group */ @@ -198,14 +193,13 @@ UNIV_INTERN void log_write_up_to( /*============*/ - ib_uint64_t lsn, /*!< in: log sequence number up to which - the log should be written, - IB_ULONGLONG_MAX if not specified */ - ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, - or LOG_WAIT_ALL_GROUPS */ - ibool flush_to_disk); - /*!< in: TRUE if we want the written log - also to be flushed to disk */ + lsn_t lsn, /*!< in: log sequence number up to which + the log should be written, LSN_MAX if not specified */ + ulint wait, /*!< in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ + ibool flush_to_disk); + /*!< in: TRUE if we want the written log + also to be flushed to disk */ /****************************************************************//** Does a syncronous flush of the log buffer to disk. */ UNIV_INTERN @@ -223,20 +217,14 @@ log_buffer_sync_in_background( /*==========================*/ ibool flush); /*<! in: flush the logs to disk */ /****************************************************************//** -Advances the smallest lsn for which there are unflushed dirty blocks in the -buffer pool and also may make a new checkpoint. NOTE: this function may only -be called if the calling thread owns no synchronization objects! -@return FALSE if there was a flush batch of the same type running, -which means that we could not start this flush batch */ -UNIV_INTERN -ibool -log_preflush_pool_modified_pages( -/*=============================*/ - ib_uint64_t new_oldest, /*!< in: try to advance - oldest_modified_lsn at least - to this lsn */ - ibool sync); /*!< in: TRUE if synchronous - operation is desired */ +Checks if an asynchronous flushing of dirty pages is required in the +background. This function is only called from the page cleaner thread. +@return lsn to which the flushing should happen or LSN_MAX +if flushing is not required */ +UNIV_INTERN +lsn_t +log_async_flush_lsn(void); +/*=====================*/ /******************************************************//** Makes a checkpoint. Note that this function does not flush dirty blocks from the buffer pool: it only checks what is lsn of the oldest @@ -261,16 +249,16 @@ UNIV_INTERN void log_make_checkpoint_at( /*===================*/ - ib_uint64_t lsn, /*!< in: make a checkpoint at this or a - later lsn, if IB_ULONGLONG_MAX, makes - a checkpoint at the latest lsn */ - ibool write_always); /*!< in: the function normally checks if - the new checkpoint would have a - greater lsn than the previous one: if - not, then no physical write is done; - by setting this parameter TRUE, a - physical write will always be made to - log files */ + lsn_t lsn, /*!< in: make a checkpoint at this or a + later lsn, if LSN_MAX, makes + a checkpoint at the latest lsn */ + ibool write_always); /*!< in: the function normally checks if + the new checkpoint would have a + greater lsn than the previous one: if + not, then no physical write is done; + by setting this parameter TRUE, a + physical write will always be made to + log files */ /****************************************************************//** Makes a checkpoint at the latest lsn and writes it to first page of each data file in the database, so that we know that the file spaces contain @@ -388,8 +376,8 @@ log_group_read_log_seg( ulint type, /*!< in: LOG_ARCHIVE or LOG_RECOVER */ byte* buf, /*!< in: buffer where to read */ log_group_t* group, /*!< in: log group */ - ib_uint64_t start_lsn, /*!< in: read area start */ - ib_uint64_t end_lsn); /*!< in: read area end */ + lsn_t start_lsn, /*!< in: read area start */ + lsn_t end_lsn); /*!< in: read area end */ /******************************************************//** Writes a buffer to a log file group. */ UNIV_INTERN @@ -400,7 +388,7 @@ log_group_write_buf( byte* buf, /*!< in: buffer */ ulint len, /*!< in: buffer len; must be divisible by OS_FILE_LOG_BLOCK_SIZE */ - ib_uint64_t start_lsn, /*!< in: start lsn of the buffer; must + lsn_t start_lsn, /*!< in: start lsn of the buffer; must be divisible by OS_FILE_LOG_BLOCK_SIZE */ ulint new_data_offset);/*!< in: start offset of new data in @@ -416,14 +404,14 @@ void log_group_set_fields( /*=================*/ log_group_t* group, /*!< in/out: group */ - ib_uint64_t lsn); /*!< in: lsn for which the values should be + lsn_t lsn); /*!< in: lsn for which the values should be set */ /******************************************************//** Calculates the data capacity of a log group, when the log file headers are not included. @return capacity in bytes */ UNIV_INTERN -ulint +lsn_t log_group_get_capacity( /*===================*/ const log_group_t* group); /*!< in: log group */ @@ -515,8 +503,8 @@ UNIV_INLINE void log_block_init( /*===========*/ - byte* log_block, /*!< in: pointer to the log buffer */ - ib_uint64_t lsn); /*!< in: lsn within the log block */ + byte* log_block, /*!< in: pointer to the log buffer */ + lsn_t lsn); /*!< in: lsn within the log block */ /************************************************************//** Initializes a log block in the log buffer in the old, < 3.23.52 format, where there was no checksum yet. */ @@ -524,8 +512,8 @@ UNIV_INLINE void log_block_init_in_old_format( /*=========================*/ - byte* log_block, /*!< in: pointer to the log buffer */ - ib_uint64_t lsn); /*!< in: lsn within the log block */ + byte* log_block, /*!< in: pointer to the log buffer */ + lsn_t lsn); /*!< in: lsn within the log block */ /************************************************************//** Converts a lsn to a log block number. @return log block number, it is > 0 and <= 1G */ @@ -533,7 +521,7 @@ UNIV_INLINE ulint log_block_convert_lsn_to_no( /*========================*/ - ib_uint64_t lsn); /*!< in: lsn of a byte within the block */ + lsn_t lsn); /*!< in: lsn of a byte within the block */ /******************************************************//** Prints info of the log. */ UNIV_INTERN @@ -548,7 +536,7 @@ UNIV_INTERN ibool log_peek_lsn( /*=========*/ - ib_uint64_t* lsn); /*!< out: if returns TRUE, current lsn is here */ + lsn_t* lsn); /*!< out: if returns TRUE, current lsn is here */ /**********************************************************************//** Refreshes the statistics used to print per-second averages. */ UNIV_INTERN @@ -579,7 +567,7 @@ extern log_t* log_sys; #define LOG_RECOVER 98887331 /* The counting of lsn's starts from this value: this must be non-zero */ -#define LOG_START_LSN ((ib_uint64_t) (16 * OS_FILE_LOG_BLOCK_SIZE)) +#define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE)) #define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE) #define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4) @@ -626,7 +614,7 @@ extern log_t* log_sys; /* Offsets for a checkpoint field */ #define LOG_CHECKPOINT_NO 0 #define LOG_CHECKPOINT_LSN 8 -#define LOG_CHECKPOINT_OFFSET 16 +#define LOG_CHECKPOINT_OFFSET_LOW32 16 #define LOG_CHECKPOINT_LOG_BUF_SIZE 20 #define LOG_CHECKPOINT_ARCHIVED_LSN 24 #define LOG_CHECKPOINT_GROUP_ARRAY 32 @@ -640,22 +628,38 @@ extern log_t* log_sys; + LOG_MAX_N_GROUPS * 8) #define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END #define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END) +#if 0 #define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END) - /* current fsp free limit in + /*!< Not used (0); + This used to contain the + current fsp free limit in tablespace 0, in units of one - megabyte; this information is only used - by ibbackup to decide if it can - truncate unused ends of - non-auto-extending data files in space - 0 */ + megabyte. + + This information might have been used + since ibbackup version 0.35 but + before 1.41 to decide if unused ends of + non-auto-extending data files + in space 0 can be truncated. + + This information was made obsolete + by ibbackup --compress. */ #define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END) - /* this magic number tells if the + /*!< Not used (0); + This magic number tells if the checkpoint contains the above field: the field was added to - InnoDB-3.23.50 */ -#define LOG_CHECKPOINT_SIZE (16 + LOG_CHECKPOINT_ARRAY_END) - + InnoDB-3.23.50 and + removed from MySQL 5.6 */ #define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243 + /*!< if LOG_CHECKPOINT_FSP_MAGIC_N + contains this value, then + LOG_CHECKPOINT_FSP_FREE_LIMIT + is valid */ +#endif +#define LOG_CHECKPOINT_OFFSET_HIGH32 (16 + LOG_CHECKPOINT_ARRAY_END) +#define LOG_CHECKPOINT_SIZE (20 + LOG_CHECKPOINT_ARRAY_END) + /* Offsets of a log file header */ #define LOG_GROUP_ID 0 /* log group number */ @@ -705,15 +709,15 @@ struct log_group_struct{ /* The following fields are protected by log_sys->mutex */ ulint id; /*!< log group id */ ulint n_files; /*!< number of files in the group */ - ulint file_size; /*!< individual log file size in bytes, + lsn_t file_size; /*!< individual log file size in bytes, including the log file header */ ulint space_id; /*!< file space which implements the log group */ ulint state; /*!< LOG_GROUP_OK or LOG_GROUP_CORRUPTED */ - ib_uint64_t lsn; /*!< lsn used to fix coordinates within + lsn_t lsn; /*!< lsn used to fix coordinates within the log group */ - ulint lsn_offset; /*!< the offset of the above lsn */ + lsn_t lsn_offset; /*!< the offset of the above lsn */ ulint n_pending_writes;/*!< number of currently pending flush writes for this log group */ byte** file_header_bufs_ptr;/*!< unaligned buffers */ @@ -742,7 +746,7 @@ struct log_group_struct{ ulint next_archived_offset; /*!< like the preceding field */ #endif /* UNIV_LOG_ARCHIVE */ /*-----------------------------*/ - ib_uint64_t scanned_lsn; /*!< used only in recovery: recovery scan + lsn_t scanned_lsn; /*!< used only in recovery: recovery scan succeeded up to this lsn in this log group */ byte* checkpoint_buf_ptr;/*!< unaligned checkpoint header */ @@ -757,12 +761,11 @@ struct log_struct{ byte pad[64]; /*!< padding to prevent other memory update hotspots from residing on the same memory cache line */ - ib_uint64_t lsn; /*!< log sequence number */ + lsn_t lsn; /*!< log sequence number */ ulint buf_free; /*!< first free offset within the log buffer */ #ifndef UNIV_HOTBACKUP mutex_t mutex; /*!< mutex protecting the log */ -#endif /* !UNIV_HOTBACKUP */ mutex_t log_flush_order_mutex;/*!< mutex to serialize access to the flush list when we are putting @@ -772,6 +775,7 @@ struct log_struct{ mtr_commit and still ensure that insertions in the flush_list happen in the LSN order. */ +#endif /* !UNIV_HOTBACKUP */ byte* buf_ptr; /* unaligned log buffer */ byte* buf; /*!< log buffer */ ulint buf_size; /*!< log buffer size in bytes */ @@ -806,13 +810,13 @@ struct log_struct{ later; this is advanced when a flush operation is completed to all the log groups */ - ib_uint64_t written_to_some_lsn; + lsn_t written_to_some_lsn; /*!< first log sequence number not yet written to any log group; for this to be advanced, it is enough that the write i/o has been completed for any one log group */ - ib_uint64_t written_to_all_lsn; + lsn_t written_to_all_lsn; /*!< first log sequence number not yet written to some log group; for this to be advanced, it is enough that the @@ -828,16 +832,16 @@ struct log_struct{ flushed_to_disk_lsn or write_lsn which are always up-to-date and accurate. */ - ib_uint64_t write_lsn; /*!< end lsn for the current running + lsn_t write_lsn; /*!< end lsn for the current running write */ ulint write_end_offset;/*!< the data in buffer has been written up to this offset when the current write ends: this field will then be copied to buf_next_to_write */ - ib_uint64_t current_flush_lsn;/*!< end lsn for the current running + lsn_t current_flush_lsn;/*!< end lsn for the current running write + flush operation */ - ib_uint64_t flushed_to_disk_lsn; + lsn_t flushed_to_disk_lsn; /*!< how far we have written the log AND flushed to disk */ ulint n_pending_writes;/*!< number of currently @@ -874,42 +878,37 @@ struct log_struct{ /* @} */ /** Fields involved in checkpoints @{ */ - ulint log_group_capacity; /*!< capacity of the log group; if + lsn_t log_group_capacity; /*!< capacity of the log group; if the checkpoint age exceeds this, it is a serious error because it is possible we will then overwrite log and spoil crash recovery */ - ulint max_modified_age_async; + lsn_t max_modified_age_async; /*!< when this recommended value for lsn - buf_pool_get_oldest_modification() is exceeded, we start an asynchronous preflush of pool pages */ - ulint max_modified_age_sync; + lsn_t max_modified_age_sync; /*!< when this recommended value for lsn - buf_pool_get_oldest_modification() is exceeded, we start a synchronous preflush of pool pages */ - ulint adm_checkpoint_interval; - /*!< administrator-specified checkpoint - interval in terms of log growth in - bytes; the interval actually used by - the database can be smaller */ - ulint max_checkpoint_age_async; + lsn_t max_checkpoint_age_async; /*!< when this checkpoint age is exceeded we start an asynchronous writing of a new checkpoint */ - ulint max_checkpoint_age; + lsn_t max_checkpoint_age; /*!< this is the maximum allowed value for lsn - last_checkpoint_lsn when a new query step is started */ ib_uint64_t next_checkpoint_no; /*!< next checkpoint number */ - ib_uint64_t last_checkpoint_lsn; + lsn_t last_checkpoint_lsn; /*!< latest checkpoint lsn */ - ib_uint64_t next_checkpoint_lsn; + lsn_t next_checkpoint_lsn; /*!< next checkpoint lsn */ ulint n_pending_checkpoint_writes; /*!< number of currently pending @@ -927,16 +926,16 @@ struct log_struct{ /** Fields involved in archiving @{ */ ulint archiving_state;/*!< LOG_ARCH_ON, LOG_ARCH_STOPPING LOG_ARCH_STOPPED, LOG_ARCH_OFF */ - ib_uint64_t archived_lsn; /*!< archiving has advanced to this + lsn_t archived_lsn; /*!< archiving has advanced to this lsn */ - ulint max_archived_lsn_age_async; + lsn_t max_archived_lsn_age_async; /*!< recommended maximum age of archived_lsn, before we start asynchronous copying to the archive */ - ulint max_archived_lsn_age; + lsn_t max_archived_lsn_age; /*!< maximum allowed age for archived_lsn */ - ib_uint64_t next_archived_lsn;/*!< during an archive write, + lsn_t next_archived_lsn;/*!< during an archive write, until the write is completed, we store the next value for archived_lsn here: the write diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic index 67db6695cab..5ecd7b85a26 100644 --- a/storage/innobase/include/log0log.ic +++ b/storage/innobase/include/log0log.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -26,6 +26,7 @@ Created 12/9/1995 Heikki Tuuri #include "os0file.h" #include "mach0data.h" #include "mtr0mtr.h" +#include "srv0mon.h" #ifdef UNIV_LOG_DEBUG /******************************************************//** @@ -192,7 +193,7 @@ UNIV_INLINE ulint log_block_convert_lsn_to_no( /*========================*/ - ib_uint64_t lsn) /*!< in: lsn of a byte within the block */ + lsn_t lsn) /*!< in: lsn of a byte within the block */ { return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1); } @@ -260,8 +261,8 @@ UNIV_INLINE void log_block_init( /*===========*/ - byte* log_block, /*!< in: pointer to the log buffer */ - ib_uint64_t lsn) /*!< in: lsn within the log block */ + byte* log_block, /*!< in: pointer to the log buffer */ + lsn_t lsn) /*!< in: lsn within the log block */ { ulint no; @@ -282,8 +283,8 @@ UNIV_INLINE void log_block_init_in_old_format( /*=========================*/ - byte* log_block, /*!< in: pointer to the log buffer */ - ib_uint64_t lsn) /*!< in: lsn within the log block */ + byte* log_block, /*!< in: pointer to the log buffer */ + lsn_t lsn) /*!< in: lsn within the log block */ { ulint no; @@ -304,12 +305,12 @@ Writes to the log the string given. The log must be released with log_release. @return end lsn of the log record, zero if did not succeed */ UNIV_INLINE -ib_uint64_t +lsn_t log_reserve_and_write_fast( /*=======================*/ const void* str, /*!< in: string */ ulint len, /*!< in: string length */ - ib_uint64_t* start_lsn)/*!< out: start lsn of the log record */ + lsn_t* start_lsn)/*!< out: start lsn of the log record */ { ulint data_len; #ifdef UNIV_LOG_LSN_DEBUG @@ -374,6 +375,9 @@ log_reserve_and_write_fast( log_sys->lsn += len; + MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE, + log_sys->lsn - log_sys->last_checkpoint_lsn); + #ifdef UNIV_LOG_DEBUG log_check_log_recs(log_sys->buf + log_sys->old_buf_free, log_sys->buf_free - log_sys->old_buf_free, @@ -396,11 +400,11 @@ log_release(void) Gets the current lsn. @return current lsn */ UNIV_INLINE -ib_uint64_t +lsn_t log_get_lsn(void) /*=============*/ { - ib_uint64_t lsn; + lsn_t lsn; mutex_enter(&(log_sys->mutex)); @@ -416,7 +420,7 @@ Gets the log group capacity. It is OK to read the value without holding log_sys->mutex because it is constant. @return log group capacity */ UNIV_INLINE -ulint +lsn_t log_get_capacity(void) /*==================*/ { diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 9f334a34b44..218298a1698 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -40,19 +40,17 @@ Reads the checkpoint info needed in hot backup. @return TRUE if success */ UNIV_INTERN ibool -recv_read_cp_info_for_backup( -/*=========================*/ +recv_read_checkpoint_info_for_backup( +/*=================================*/ const byte* hdr, /*!< in: buffer containing the log group header */ - ib_uint64_t* lsn, /*!< out: checkpoint lsn */ - ulint* offset, /*!< out: checkpoint offset in the log group */ - ulint* fsp_limit,/*!< out: fsp limit of space 0, - 1000000000 if the database is running - with < version 3.23.50 of InnoDB */ - ib_uint64_t* cp_no, /*!< out: checkpoint number */ - ib_uint64_t* first_header_lsn); + lsn_t* lsn, /*!< out: checkpoint lsn */ + lsn_t* offset, /*!< out: checkpoint offset in the log group */ + lsn_t* cp_no, /*!< out: checkpoint number */ + lsn_t* first_header_lsn) /*!< out: lsn of of the start of the first log file */ + __attribute__((nonnull)); /*******************************************************************//** Scans the log segment and n_bytes_scanned is set to the length of valid log scanned. */ @@ -62,7 +60,7 @@ recv_scan_log_seg_for_backup( /*=========================*/ byte* buf, /*!< in: buffer containing log data */ ulint buf_len, /*!< in: data length in that buffer */ - ib_uint64_t* scanned_lsn, /*!< in/out: lsn of buffer start, + lsn_t* scanned_lsn, /*!< in/out: lsn of buffer start, we return scanned lsn */ ulint* scanned_checkpoint_no, /*!< in/out: 4 lowest bytes of the @@ -136,12 +134,12 @@ recv_recovery_from_checkpoint_start_func( #ifdef UNIV_LOG_ARCHIVE ulint type, /*!< in: LOG_CHECKPOINT or LOG_ARCHIVE */ - ib_uint64_t limit_lsn, /*!< in: recover up to this lsn + lsn_t limit_lsn, /*!< in: recover up to this lsn if possible */ #endif /* UNIV_LOG_ARCHIVE */ - ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn from + lsn_t min_flushed_lsn,/*!< in: min flushed lsn from data files */ - ib_uint64_t max_flushed_lsn);/*!< in: max flushed lsn from + lsn_t max_flushed_lsn);/*!< in: max flushed lsn from data files */ #ifdef UNIV_LOG_ARCHIVE /** Wrapper for recv_recovery_from_checkpoint_start_func(). @@ -202,11 +200,11 @@ recv_scan_log_recs( const byte* buf, /*!< in: buffer containing a log segment or garbage */ ulint len, /*!< in: buffer length */ - ib_uint64_t start_lsn, /*!< in: buffer start lsn */ - ib_uint64_t* contiguous_lsn, /*!< in/out: it is known that all log + lsn_t start_lsn, /*!< in: buffer start lsn */ + lsn_t* contiguous_lsn, /*!< in/out: it is known that all log groups contain contiguous log data up to this lsn */ - ib_uint64_t* group_scanned_lsn);/*!< out: scanning succeeded up to + lsn_t* group_scanned_lsn);/*!< out: scanning succeeded up to this lsn */ /******************************************************//** Resets the logs. The contents of log files will be lost! */ @@ -214,7 +212,7 @@ UNIV_INTERN void recv_reset_logs( /*============*/ - ib_uint64_t lsn, /*!< in: reset to this lsn + lsn_t lsn, /*!< in: reset to this lsn rounded up to be divisible by OS_FILE_LOG_BLOCK_SIZE, after which we add @@ -235,8 +233,8 @@ recv_reset_log_files_for_backup( /*============================*/ const char* log_dir, /*!< in: log file directory path */ ulint n_log_files, /*!< in: number of log files */ - ulint log_file_size, /*!< in: log file size */ - ib_uint64_t lsn); /*!< in: new start lsn, must be + lsn_t log_file_size, /*!< in: log file size */ + lsn_t lsn); /*!< in: new start lsn, must be divisible by OS_FILE_LOG_BLOCK_SIZE */ #endif /* UNIV_HOTBACKUP */ /********************************************************//** @@ -302,9 +300,9 @@ UNIV_INTERN ulint recv_recovery_from_archive_start( /*=============================*/ - ib_uint64_t min_flushed_lsn,/*!< in: min flushed lsn field from the + lsn_t min_flushed_lsn,/*!< in: min flushed lsn field from the data files */ - ib_uint64_t limit_lsn, /*!< in: recover up to this lsn if + lsn_t limit_lsn, /*!< in: recover up to this lsn if possible */ ulint first_log_no); /*!< in: number of the first archived log file to use in the recovery; the @@ -337,11 +335,11 @@ struct recv_struct{ ulint len; /*!< log record body length in bytes */ recv_data_t* data; /*!< chain of blocks containing the log record body */ - ib_uint64_t start_lsn;/*!< start lsn of the log segment written by + lsn_t start_lsn;/*!< start lsn of the log segment written by the mtr which generated this log record: NOTE that this is not necessarily the start lsn of this log record */ - ib_uint64_t end_lsn;/*!< end lsn of the log segment written by + lsn_t end_lsn;/*!< end lsn of the log segment written by the mtr which generated this log record: NOTE that this is not necessarily the end lsn of this log record */ @@ -392,7 +390,7 @@ struct recv_sys_struct{ ibool apply_batch_on; /*!< this is TRUE when a log rec application batch is running */ - ib_uint64_t lsn; /*!< log sequence number */ + lsn_t lsn; /*!< log sequence number */ ulint last_log_buf_size; /*!< size of the log buffer when the database last time wrote to the log */ @@ -404,12 +402,12 @@ struct recv_sys_struct{ preceding buffer */ byte* buf; /*!< buffer for parsing log records */ ulint len; /*!< amount of data in buf */ - ib_uint64_t parse_start_lsn; + lsn_t parse_start_lsn; /*!< this is the lsn from which we were able to start parsing log records and adding them to the hash table; zero if a suitable start point not found yet */ - ib_uint64_t scanned_lsn; + lsn_t scanned_lsn; /*!< the log data has been scanned up to this lsn */ ulint scanned_checkpoint_no; @@ -418,10 +416,10 @@ struct recv_sys_struct{ ulint recovered_offset; /*!< start offset of non-parsed log records in buf */ - ib_uint64_t recovered_lsn; + lsn_t recovered_lsn; /*!< the log records have been parsed up to this lsn */ - ib_uint64_t limit_lsn;/*!< recovery should be made at most + lsn_t limit_lsn;/*!< recovery should be made at most up to this lsn */ ibool found_corrupt_log; /*!< this is set to TRUE if we during log diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic index 0a8e55b96fa..32c28dd03e6 100644 --- a/storage/innobase/include/log0recv.ic +++ b/storage/innobase/include/log0recv.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -33,7 +33,7 @@ ibool recv_recovery_is_on(void) /*=====================*/ { - return(UNIV_UNLIKELY(recv_recovery_on)); + return(recv_recovery_on); } #ifdef UNIV_LOG_ARCHIVE diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h index 8434bc73586..3066070ef39 100644 --- a/storage/innobase/include/mach0data.h +++ b/storage/innobase/include/mach0data.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -27,6 +27,8 @@ Created 11/28/1995 Heikki Tuuri #ifndef mach0data_h #define mach0data_h +#ifndef UNIV_INNOCHECKSUM + #include "univ.i" #include "ut0byte.h" @@ -204,7 +206,7 @@ UNIV_INLINE void mach_write_to_8( /*============*/ - byte* b, /*!< in: pointer to 8 bytes where to store */ + void* b, /*!< in: pointer to 8 bytes where to store */ ib_uint64_t n); /*!< in: 64-bit integer to be stored */ /********************************************************//** The following function is used to fetch data from 8 consecutive @@ -361,19 +363,19 @@ mach_write_to_2_little_endian( /*==========================*/ byte* dest, /*!< in: where to write */ ulint n); /*!< in: unsigned long int to write */ - /*********************************************************//** Convert integral type from storage byte order (big endian) to host byte order. @return integer value */ UNIV_INLINE -ullint +ib_uint64_t mach_read_int_type( /*===============*/ const byte* src, /*!< in: where to read from */ ulint len, /*!< in: length of src */ ibool unsigned_type); /*!< in: signed or unsigned flag */ #endif /* !UNIV_HOTBACKUP */ +#endif /* !UNIV_INNOCHECKSUM */ #ifndef UNIV_NONINL #include "mach0data.ic" diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic index b1e5991d39e..ec1a28bca47 100644 --- a/storage/innobase/include/mach0data.ic +++ b/storage/innobase/include/mach0data.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -24,6 +24,8 @@ to the machine format. Created 11/28/1995 Heikki Tuuri ***********************************************************************/ +#ifndef UNIV_INNOCHECKSUM + #include "ut0mem.h" /*******************************************************//** @@ -38,7 +40,7 @@ mach_write_to_1( ut_ad(b); ut_ad((n | 0xFFUL) <= 0xFFUL); - b[0] = (byte)n; + b[0] = (byte) n; } /********************************************************//** @@ -165,9 +167,11 @@ mach_write_to_4( b[0] = (byte)(n >> 24); b[1] = (byte)(n >> 16); b[2] = (byte)(n >> 8); - b[3] = (byte)n; + b[3] = (byte) n; } +#endif /* !UNIV_INNOCHECKSUM */ + /********************************************************//** The following function is used to fetch data from 4 consecutive bytes. The most significant byte is at the lowest address. @@ -186,6 +190,8 @@ mach_read_from_4( ); } +#ifndef UNIV_INNOCHECKSUM + /*********************************************************//** Writes a ulint in a compressed form where the first byte codes the length of the stored ulint. We look at the most significant bits of @@ -280,13 +286,13 @@ UNIV_INLINE void mach_write_to_8( /*============*/ - byte* b, /*!< in: pointer to 8 bytes where to store */ + void* b, /*!< in: pointer to 8 bytes where to store */ ib_uint64_t n) /*!< in: 64-bit integer to be stored */ { ut_ad(b); - mach_write_to_4(b, (ulint) (n >> 32)); - mach_write_to_4(b + 4, (ulint) n); + mach_write_to_4(static_cast<byte*>(b), (ulint) (n >> 32)); + mach_write_to_4(static_cast<byte*>(b) + 4, (ulint) n); } /********************************************************//** @@ -550,7 +556,7 @@ mach_double_read( ulint i; byte* ptr; - ptr = (byte*)&d; + ptr = (byte*) &d; for (i = 0; i < sizeof(double); i++) { #ifdef WORDS_BIGENDIAN @@ -575,7 +581,7 @@ mach_double_write( ulint i; byte* ptr; - ptr = (byte*)&d; + ptr = (byte*) &d; for (i = 0; i < sizeof(double); i++) { #ifdef WORDS_BIGENDIAN @@ -599,7 +605,7 @@ mach_float_read( ulint i; byte* ptr; - ptr = (byte*)&d; + ptr = (byte*) &d; for (i = 0; i < sizeof(float); i++) { #ifdef WORDS_BIGENDIAN @@ -624,7 +630,7 @@ mach_float_write( ulint i; byte* ptr; - ptr = (byte*)&d; + ptr = (byte*) &d; for (i = 0; i < sizeof(float); i++) { #ifdef WORDS_BIGENDIAN @@ -648,7 +654,6 @@ mach_read_from_n_little_endian( ulint n = 0; const byte* ptr; - ut_ad(buf_size <= sizeof(ulint)); ut_ad(buf_size > 0); ptr = buf + buf_size; @@ -736,7 +741,7 @@ Convert integral type from storage byte order (big endian) to host byte order. @return integer value */ UNIV_INLINE -ullint +ib_uint64_t mach_read_int_type( /*===============*/ const byte* src, /*!< in: where to read from */ @@ -772,3 +777,4 @@ mach_read_int_type( return(ret); } #endif /* !UNIV_HOTBACKUP */ +#endif /* !UNIV_INNOCHECKSUM */ diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h index d81e1418b2b..9f95e84c81e 100644 --- a/storage/innobase/include/mem0dbg.h +++ b/storage/innobase/include/mem0dbg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -31,7 +31,7 @@ check fields whose sizes are given below */ # ifndef UNIV_HOTBACKUP /* The mutex which protects in the debug version the hash table containing the list of live memory heaps, and also the global -variables in mem0dbg.c. */ +variables in mem0dbg.cc. */ extern mutex_t mem_hash_mutex; # endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic index b0c8178a623..ec60ed35337 100644 --- a/storage/innobase/include/mem0dbg.ic +++ b/storage/innobase/include/mem0dbg.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h index 5181bb4c9f7..6851a5bc01b 100644 --- a/storage/innobase/include/mem0mem.h +++ b/storage/innobase/include/mem0mem.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -62,6 +62,12 @@ buffer pool; the latter method is used for very big heaps */ allocation functions can return NULL. */ +/* Different type of heaps in terms of which datastructure is using them */ +#define MEM_HEAP_FOR_BTR_SEARCH (MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER) +#define MEM_HEAP_FOR_PAGE_HASH (MEM_HEAP_DYNAMIC) +#define MEM_HEAP_FOR_RECV_SYS (MEM_HEAP_BUFFER) +#define MEM_HEAP_FOR_LOCK_HEAP (MEM_HEAP_BUFFER) + /* The following start size is used for the first block in the memory heap if the size is not specified, i.e., 0 is given as the parameter in the call of create. The standard size is the maximum (payload) size of the blocks used for @@ -99,16 +105,8 @@ heap creation. */ Use this macro instead of the corresponding function! Macro for memory heap creation. */ -#define mem_heap_create_in_buffer(N) mem_heap_create_func(\ - (N), MEM_HEAP_BUFFER, __FILE__, __LINE__) -/**************************************************************//** -Use this macro instead of the corresponding function! Macro for memory -heap creation. */ - -#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\ - (N), MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\ - __FILE__, __LINE__) - +#define mem_heap_create_typed(N, T) mem_heap_create_func(\ + (N), (T), __FILE__, __LINE__) /**************************************************************//** Use this macro instead of the corresponding function! Macro for memory heap freeing. */ @@ -221,7 +219,7 @@ mem_heap_get_size( Use this macro instead of the corresponding function! Macro for memory buffer allocation */ -#define mem_zalloc(N) memset(mem_alloc(N), 0, (N)); +#define mem_zalloc(N) memset(mem_alloc(N), 0, (N)) #define mem_alloc(N) mem_alloc_func((N), NULL, __FILE__, __LINE__) #define mem_alloc2(N,S) mem_alloc_func((N), (S), __FILE__, __LINE__) @@ -320,7 +318,7 @@ mem_heap_dup( ulint len); /*!< in: length of data, in bytes */ /****************************************************************//** -A simple (s)printf replacement that dynamically allocates the space for the +A simple sprintf replacement that dynamically allocates the space for the formatted string from the given heap. This supports a very limited set of the printf syntax: types 's' and 'u' and length modifier 'l' (which is required for the 'u' type). diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic index c70615e1ca9..eee3806dd52 100644 --- a/storage/innobase/include/mem0mem.ic +++ b/storage/innobase/include/mem0mem.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -193,7 +193,7 @@ mem_heap_alloc( free = mem_block_get_free(block); - buf = (byte*)block + free; + buf = (byte*) block + free; mem_block_set_free(block, free + MEM_SPACE_NEEDED(n)); @@ -202,11 +202,11 @@ mem_heap_alloc( n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE); /* In the debug version write debugging info to the field */ - mem_field_init((byte*)buf, n); + mem_field_init((byte*) buf, n); /* Advance buf to point at the storage which will be given to the caller */ - buf = (byte*)buf + MEM_FIELD_HEADER_SIZE; + buf = (byte*) buf + MEM_FIELD_HEADER_SIZE; #endif UNIV_MEM_ALLOC(buf, n); @@ -229,7 +229,7 @@ mem_heap_get_heap_top( block = UT_LIST_GET_LAST(heap->base); - buf = (byte*)block + mem_block_get_free(block); + buf = (byte*) block + mem_block_get_free(block); return(buf); } @@ -272,8 +272,8 @@ mem_heap_free_heap_top( block = UT_LIST_GET_LAST(heap->base); while (block != NULL) { - if (((byte*)block + mem_block_get_free(block) >= old_top) - && ((byte*)block <= old_top)) { + if (((byte*) block + mem_block_get_free(block) >= old_top) + && ((byte*) block <= old_top)) { /* Found the right block */ break; @@ -292,22 +292,22 @@ mem_heap_free_heap_top( ut_ad(block); /* Set the free field of block */ - mem_block_set_free(block, old_top - (byte*)block); + mem_block_set_free(block, old_top - (byte*) block); #ifdef UNIV_MEM_DEBUG ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); /* In the debug version erase block from top up */ - mem_erase_buf(old_top, (byte*)block + block->len - old_top); + mem_erase_buf(old_top, (byte*) block + block->len - old_top); /* Update allocated memory count */ mutex_enter(&mem_hash_mutex); mem_current_allocated_memory -= (total_size - size); mutex_exit(&mem_hash_mutex); #else /* UNIV_MEM_DEBUG */ - UNIV_MEM_ASSERT_W(old_top, (byte*)block + block->len - old_top); + UNIV_MEM_ASSERT_W(old_top, (byte*) block + block->len - old_top); #endif /* UNIV_MEM_DEBUG */ - UNIV_MEM_ALLOC(old_top, (byte*)block + block->len - old_top); + UNIV_MEM_ALLOC(old_top, (byte*) block + block->len - old_top); /* If free == start, we may free the block if it is not the first one */ @@ -326,7 +326,7 @@ mem_heap_empty( /*===========*/ mem_heap_t* heap) /*!< in: heap to empty */ { - mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap)); + mem_heap_free_heap_top(heap, (byte*) heap + mem_block_get_start(heap)); #ifndef UNIV_HOTBACKUP if (heap->free_block) { mem_heap_free_block_free(heap); @@ -394,7 +394,7 @@ mem_heap_free_top( ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); /* In the debug version check the consistency, and erase field */ - mem_field_erase((byte*)block + mem_block_get_free(block), n); + mem_field_erase((byte*) block + mem_block_get_free(block), n); #endif /* If free == start, we may free the block if it is not the first @@ -529,7 +529,7 @@ mem_alloc_func( first block and thus we can calculate the pointer to the heap from the pointer to the buffer when we free the memory buffer. */ - if (UNIV_LIKELY_NULL(size)) { + if (size) { /* Adjust the allocation to the actual size of the memory block. */ ulint m = mem_block_get_len(heap) @@ -538,12 +538,13 @@ mem_alloc_func( m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE; #endif /* UNIV_MEM_DEBUG */ ut_ad(m >= n); - *size = n = m; + n = m; + *size = m; } buf = mem_heap_alloc(heap, n); - ut_a((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE + ut_a((byte*) heap == (byte*) buf - MEM_BLOCK_HEADER_SIZE - MEM_FIELD_HEADER_SIZE); return(buf); } @@ -562,7 +563,7 @@ mem_free_func( { mem_heap_t* heap; - heap = (mem_heap_t*)((byte*)ptr - MEM_BLOCK_HEADER_SIZE + heap = (mem_heap_t*)((byte*) ptr - MEM_BLOCK_HEADER_SIZE - MEM_FIELD_HEADER_SIZE); mem_heap_free_func(heap, file_name, line); } diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h index fa8be296ec9..451055e857f 100644 --- a/storage/innobase/include/mem0pool.h +++ b/storage/innobase/include/mem0pool.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/mem0pool.ic index b891dd6dea0..f4bafb8ba63 100644 --- a/storage/innobase/include/mem0pool.ic +++ b/storage/innobase/include/mem0pool.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h index d271002a5fe..1427a981bef 100644 --- a/storage/innobase/include/mtr0log.h +++ b/storage/innobase/include/mtr0log.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -168,7 +168,7 @@ mlog_write_initial_log_record_fast( mtr_t* mtr); /*!< in: mtr */ #else /* !UNIV_HOTBACKUP */ # define mlog_write_initial_log_record(ptr,type,mtr) ((void) 0) -# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte *) 0) +# define mlog_write_initial_log_record_fast(ptr,type,log_ptr,mtr) ((byte*) 0) #endif /* !UNIV_HOTBACKUP */ /********************************************************//** Parses an initial log record written by mlog_write_initial_log_record. diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic index 6f871170099..3ed4876eeab 100644 --- a/storage/innobase/include/mtr0log.ic +++ b/storage/innobase/include/mtr0log.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -26,6 +26,7 @@ Created 12/7/1995 Heikki Tuuri #include "mach0data.h" #include "ut0lst.h" #include "buf0buf.h" +#include "buf0dblwr.h" #include "fsp0types.h" #include "trx0sys.h" @@ -203,7 +204,7 @@ mlog_write_initial_log_record_fast( system tablespace */ if (space == TRX_SYS_SPACE && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) { - if (trx_doublewrite_buf_is_being_created) { + if (buf_dblwr_being_created) { /* Do nothing: we only come to this branch in an InnoDB database creation. We do not redo log anything for the doublewrite buffer pages. */ diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h index 46f1ff9310c..fd84f1119cc 100644 --- a/storage/innobase/include/mtr0mtr.h +++ b/storage/innobase/include/mtr0mtr.h @@ -191,6 +191,9 @@ functions). The page number parameter was originally written as 0. @{ */ MLOG_FILE_CREATE, MLOG_FILE_CREATE2 */ /* @} */ +/* included here because it needs MLOG_LSN defined */ +#include "log0log.h" + /***************************************************************//** Starts a mini-transaction. */ UNIV_INLINE @@ -355,7 +358,6 @@ mtr_memo_push( void* object, /*!< in: object */ ulint type); /*!< in: object type: MTR_MEMO_S_LOCK, ... */ - /* Type definition of a mini-transaction memo stack slot. */ typedef struct mtr_memo_slot_struct mtr_memo_slot_t; struct mtr_memo_slot_struct{ @@ -370,11 +372,14 @@ struct mtr_struct{ #endif dyn_array_t memo; /*!< memo stack for locks etc. */ dyn_array_t log; /*!< mini-transaction log */ - ibool inside_ibuf; + unsigned inside_ibuf:1; /*!< TRUE if inside ibuf changes */ - ibool modifications; - /* TRUE if the mtr made modifications to - buffer pool pages */ + unsigned modifications:1; + /*!< TRUE if the mini-transaction + modified buffer pool pages */ + unsigned made_dirty:1; + /*!< TRUE if mtr has made at least + one buffer pool page dirty */ ulint n_log_recs; /* count of how many page initial log records have been written to the mtr log */ @@ -383,9 +388,9 @@ struct mtr_struct{ this mini-transaction */ ulint log_mode; /* specifies which operations should be logged; default value MTR_LOG_ALL */ - ib_uint64_t start_lsn;/* start lsn of the possible log entry for + lsn_t start_lsn;/* start lsn of the possible log entry for this mtr */ - ib_uint64_t end_lsn;/* end lsn of the possible log entry for + lsn_t end_lsn;/* end lsn of the possible log entry for this mtr */ #ifdef UNIV_DEBUG ulint magic_n; diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic index a03a0271535..dcd9826b380 100644 --- a/storage/innobase/include/mtr0mtr.ic +++ b/storage/innobase/include/mtr0mtr.ic @@ -29,6 +29,16 @@ Created 11/26/1995 Heikki Tuuri #endif /* !UNIV_HOTBACKUP */ #include "mach0data.h" +/***************************************************//** +Checks if a mini-transaction is dirtying a clean page. +@return TRUE if the mtr is dirtying a clean page. */ +UNIV_INTERN +ibool +mtr_block_dirtied( +/*==============*/ + const buf_block_t* block) /*!< in: block being x-fixed */ + __attribute__((nonnull,warn_unused_result)); + /***************************************************************//** Starts a mini-transaction. */ UNIV_INLINE @@ -43,8 +53,9 @@ mtr_start( dyn_array_create(&(mtr->log)); mtr->log_mode = MTR_LOG_ALL; - mtr->modifications = FALSE; mtr->inside_ibuf = FALSE; + mtr->modifications = FALSE; + mtr->made_dirty = FALSE; mtr->n_log_recs = 0; mtr->n_freed_pages = 0; @@ -72,6 +83,15 @@ mtr_memo_push( ut_ad(mtr->magic_n == MTR_MAGIC_N); ut_ad(mtr->state == MTR_ACTIVE); + /* If this mtr has x-fixed a clean page then we set + the made_dirty flag. This tells us if we need to + grab log_flush_order_mutex at mtr_commit so that we + can insert the dirtied page to the flush list. */ + if (type == MTR_MEMO_PAGE_X_FIX && !mtr->made_dirty) { + mtr->made_dirty = + mtr_block_dirtied((const buf_block_t*) object); + } + memo = &(mtr->memo); slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot); @@ -249,7 +269,7 @@ mtr_s_lock_func( ut_ad(mtr); ut_ad(lock); - rw_lock_s_lock_inline(lock, 0, file, line); + rw_lock_s_lock_func(lock, 0, file, line); mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK); } @@ -268,7 +288,7 @@ mtr_x_lock_func( ut_ad(mtr); ut_ad(lock); - rw_lock_x_lock_inline(lock, 0, file, line); + rw_lock_x_lock_func(lock, 0, file, line); mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK); } diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h index 83a7aaf3839..7a2bcefadb9 100644 --- a/storage/innobase/include/mtr0types.h +++ b/storage/innobase/include/mtr0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index fb13120a481..8f84193cb0f 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted @@ -19,9 +19,9 @@ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA ***********************************************************************/ @@ -46,9 +46,6 @@ Created 10/21/1995 Heikki Tuuri /** File node of a tablespace or the log data space */ typedef struct fil_node_struct fil_node_t; -#ifdef UNIV_DO_FLUSH -extern ibool os_do_not_call_flush_at_each_write; -#endif /* UNIV_DO_FLUSH */ extern ibool os_has_said_disk_full; /** Flag: enable debug printout for asynchronous i/o */ extern ibool os_aio_print_debug; @@ -74,6 +71,8 @@ extern ulint os_n_pending_writes; #endif +/** File offset in bytes */ +typedef ib_uint64_t os_offset_t; #ifdef __WIN__ /** File handle */ # define os_file_t HANDLE @@ -102,14 +101,28 @@ log. */ #define OS_FILE_LOG_BLOCK_SIZE 512 -/** Options for file_create @{ */ -#define OS_FILE_OPEN 51 -#define OS_FILE_CREATE 52 -#define OS_FILE_OVERWRITE 53 -#define OS_FILE_OPEN_RAW 54 -#define OS_FILE_CREATE_PATH 55 -#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on - the first ibdata file */ +/** Options for os_file_create_func @{ */ +typedef enum os_file_create_enum { + OS_FILE_OPEN = 51, /*!< to open an existing file (if + doesn't exist, error) */ + OS_FILE_CREATE, /*!< to create new file (if + exists, error) */ + OS_FILE_OVERWRITE, /*!< to create a new file, if exists + the overwrite old file */ + OS_FILE_OPEN_RAW, /*!< to open a raw device or disk + partition */ + OS_FILE_CREATE_PATH, /*!< to create the directories */ + OS_FILE_OPEN_RETRY, /*!< open with retry */ + + /** Flags that can be combined with the above values. Please ensure + that the above values stay below 128. */ + + OS_FILE_ON_ERROR_NO_EXIT = 128, /*!< do not exit on unknown errors */ + OS_FILE_ON_ERROR_SILENT = 256 /*!< don't print diagnostic messages to + the log unless it is a fatal error, + this flag is only used if + ON_ERROR_NO_EXIT is set */ +} os_file_create_t; #define OS_FILE_READ_ONLY 333 #define OS_FILE_READ_WRITE 444 @@ -204,20 +217,18 @@ used to register actual file read, write and flush */ # define register_pfs_file_open_begin(state, locker, key, op, name, \ src_file, src_line) \ do { \ - if (PSI_server) { \ - locker = PSI_server->get_thread_file_name_locker( \ - state, key, op, name, &locker); \ - if (locker) { \ - PSI_server->start_file_open_wait( \ - locker, src_file, src_line); \ - } \ + locker = PSI_CALL(get_thread_file_name_locker)( \ + state, key, op, name, &locker); \ + if (UNIV_LIKELY(locker != NULL)) { \ + PSI_CALL(start_file_open_wait)( \ + locker, src_file, src_line); \ } \ } while (0) # define register_pfs_file_open_end(locker, file) \ do { \ - if (locker) { \ - PSI_server->end_file_open_wait_and_bind_to_descriptor( \ + if (UNIV_LIKELY(locker != NULL)) { \ + PSI_CALL(end_file_open_wait_and_bind_to_descriptor)( \ locker, file); \ } \ } while (0) @@ -225,20 +236,18 @@ do { \ # define register_pfs_file_io_begin(state, locker, file, count, op, \ src_file, src_line) \ do { \ - if (PSI_server) { \ - locker = PSI_server->get_thread_file_descriptor_locker( \ - state, file, op); \ - if (locker) { \ - PSI_server->start_file_wait( \ - locker, count, src_file, src_line); \ - } \ + locker = PSI_CALL(get_thread_file_descriptor_locker)( \ + state, file, op); \ + if (UNIV_LIKELY(locker != NULL)) { \ + PSI_CALL(start_file_wait)( \ + locker, count, src_file, src_line); \ } \ } while (0) # define register_pfs_file_io_end(locker, count) \ do { \ - if (locker) { \ - PSI_server->end_file_wait(locker, count); \ + if (UNIV_LIKELY(locker != NULL)) { \ + PSI_CALL(end_file_wait)(locker, count); \ } \ } while (0) #endif /* UNIV_PFS_IO */ @@ -276,24 +285,20 @@ The wrapper functions have the prefix of "innodb_". */ # define os_file_close(file) \ pfs_os_file_close_func(file, __FILE__, __LINE__) -# define os_aio(type, mode, name, file, buf, offset, offset_high, \ +# define os_aio(type, mode, name, file, buf, offset, \ n, message1, message2) \ pfs_os_aio_func(type, mode, name, file, buf, offset, \ - offset_high, n, message1, message2, \ - __FILE__, __LINE__) + n, message1, message2, __FILE__, __LINE__) -# define os_file_read(file, buf, offset, offset_high, n) \ - pfs_os_file_read_func(file, buf, offset, offset_high, n, \ - __FILE__, __LINE__) +# define os_file_read(file, buf, offset, n) \ + pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__) -# define os_file_read_no_error_handling(file, buf, offset, \ - offset_high, n) \ - pfs_os_file_read_no_error_handling_func(file, buf, offset, \ - offset_high, n, \ +# define os_file_read_no_error_handling(file, buf, offset, n) \ + pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \ __FILE__, __LINE__) -# define os_file_write(name, file, buf, offset, offset_high, n) \ - pfs_os_file_write_func(name, file, buf, offset, offset_high, \ +# define os_file_write(name, file, buf, offset, n) \ + pfs_os_file_write_func(name, file, buf, offset, \ n, __FILE__, __LINE__) # define os_file_flush(file) \ @@ -308,7 +313,7 @@ to original un-instrumented file I/O APIs */ # define os_file_create(key, name, create, purpose, type, success) \ os_file_create_func(name, create, purpose, type, success) -# define os_file_create_simple(key, name, create, access, success) \ +# define os_file_create_simple(key, name, create_mode, access, success) \ os_file_create_simple_func(name, create_mode, access, success) # define os_file_create_simple_no_error_handling( \ @@ -318,20 +323,18 @@ to original un-instrumented file I/O APIs */ # define os_file_close(file) os_file_close_func(file) -# define os_aio(type, mode, name, file, buf, offset, offset_high, \ - n, message1, message2) \ - os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\ +# define os_aio(type, mode, name, file, buf, offset, n, message1, message2) \ + os_aio_func(type, mode, name, file, buf, offset, n, \ message1, message2) -# define os_file_read(file, buf, offset, offset_high, n) \ - os_file_read_func(file, buf, offset, offset_high, n) +# define os_file_read(file, buf, offset, n) \ + os_file_read_func(file, buf, offset, n) -# define os_file_read_no_error_handling(file, buf, offset, \ - offset_high, n) \ - os_file_read_no_error_handling_func(file, buf, offset, offset_high, n) +# define os_file_read_no_error_handling(file, buf, offset, n) \ + os_file_read_no_error_handling_func(file, buf, offset, n) -# define os_file_write(name, file, buf, offset, offset_high, n) \ - os_file_write_func(name, file, buf, offset, offset_high, n) +# define os_file_write(name, file, buf, offset, n) \ + os_file_write_func(name, file, buf, offset, n) # define os_file_flush(file) os_file_flush_func(file) @@ -461,13 +464,7 @@ os_file_create_simple_func( /*=======================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is - opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), or - OS_FILE_CREATE_PATH if new file - (if exists, error) and subdirectories along - its path are created (if needed)*/ + ulint create_mode,/*!< in: create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ ibool* success);/*!< out: TRUE if succeed, FALSE if error */ @@ -483,15 +480,13 @@ os_file_create_simple_no_error_handling_func( /*=========================================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error) */ + ulint create_mode,/*!< in: create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ - ibool* success);/*!< out: TRUE if succeed, FALSE if error */ + ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + __attribute__((nonnull, warn_unused_result)); /****************************************************************//** Tries to disable OS caching on an opened file descriptor. */ UNIV_INTERN @@ -515,14 +510,7 @@ os_file_create_func( /*================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), - OS_FILE_OVERWRITE if a new file is created - or an old overwritten; - OS_FILE_OPEN_RAW, if a raw device or disk - partition should be opened */ + ulint create_mode,/*!< in: create mode */ ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, non-buffered i/o is desired, OS_FILE_NORMAL, if any normal file; @@ -531,7 +519,8 @@ os_file_create_func( async i/o or unbuffered i/o: look in the function source code for the exact rules */ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success);/*!< out: TRUE if succeed, FALSE if error */ + ibool* success)/*!< out: TRUE if succeed, FALSE if error */ + __attribute__((nonnull, warn_unused_result)); /***********************************************************************//** Deletes a file. The file has to be closed before calling this. @return TRUE if success */ @@ -539,7 +528,8 @@ UNIV_INTERN ibool os_file_delete( /*===========*/ - const char* name); /*!< in: file path as a null-terminated string */ + const char* name); /*!< in: file path as a null-terminated + string */ /***********************************************************************//** Deletes a file if it exists. The file has to be closed before calling this. @@ -548,7 +538,8 @@ UNIV_INTERN ibool os_file_delete_if_exists( /*=====================*/ - const char* name); /*!< in: file path as a null-terminated string */ + const char* name); /*!< in: file path as a null-terminated + string */ /***********************************************************************//** NOTE! Use the corresponding macro os_file_rename(), not directly this function! @@ -589,18 +580,13 @@ pfs_os_file_create_simple_func( mysql_pfs_key_t key, /*!< in: Performance Schema Key */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is - opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), or - OS_FILE_CREATE_PATH if new file - (if exists, error) and subdirectories along - its path are created (if needed)*/ + ulint create_mode,/*!< in: create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ + ulint src_line)/*!< in: line where the func invoked */ + __attribute__((nonnull, warn_unused_result)); /****************************************************************//** NOTE! Please use the corresponding macro @@ -617,17 +603,15 @@ pfs_os_file_create_simple_no_error_handling_func( mysql_pfs_key_t key, /*!< in: Performance Schema Key */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error) */ + ulint create_mode, /*!< in: file create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or OS_FILE_READ_ALLOW_DELETE; the last option is used by a backup program reading the file */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ + ulint src_line)/*!< in: line where the func invoked */ + __attribute__((nonnull, warn_unused_result)); /****************************************************************//** NOTE! Please use the corresponding macro os_file_create(), not directly @@ -643,14 +627,7 @@ pfs_os_file_create_func( mysql_pfs_key_t key, /*!< in: Performance Schema Key */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), - OS_FILE_OVERWRITE if a new file is created - or an old overwritten; - OS_FILE_OPEN_RAW, if a raw device or disk - partition should be opened */ + ulint create_mode,/*!< in: file create mode */ ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, non-buffered i/o is desired, OS_FILE_NORMAL, if any normal file; @@ -661,7 +638,8 @@ pfs_os_file_create_func( ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ const char* src_file,/*!< in: file name where func invoked */ - ulint src_line);/*!< in: line where the func invoked */ + ulint src_line)/*!< in: line where the func invoked */ + __attribute__((nonnull, warn_unused_result)); /***********************************************************************//** NOTE! Please use the corresponding macro os_file_close(), not directly @@ -687,10 +665,7 @@ pfs_os_file_read_func( /*==================*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -708,10 +683,7 @@ pfs_os_file_read_no_error_handling_func( /*====================================*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -733,10 +705,7 @@ pfs_os_aio_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read or write */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read or write */ ulint n, /*!< in: number of bytes to read or write */ fil_node_t* message1,/*!< in: message for the aio handler (can be used to identify a completed @@ -762,10 +731,7 @@ pfs_os_file_write_func( null-terminated string */ os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to write */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to write */ ulint n, /*!< in: number of bytes to write */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -814,23 +780,13 @@ os_file_close_no_error_handling( #endif /* UNIV_HOTBACKUP */ /***********************************************************************//** Gets a file size. -@return TRUE if success */ +@return file size, or (os_offset_t) -1 on failure */ UNIV_INTERN -ibool +os_offset_t os_file_get_size( /*=============*/ - os_file_t file, /*!< in: handle to a file */ - ulint* size, /*!< out: least significant 32 bits of file - size */ - ulint* size_high);/*!< out: most significant 32 bits of size */ -/***********************************************************************//** -Gets file size as a 64-bit integer ib_int64_t. -@return size in bytes, -1 if error */ -UNIV_INTERN -ib_int64_t -os_file_get_size_as_iblonglong( -/*===========================*/ - os_file_t file); /*!< in: handle to a file */ + os_file_t file) /*!< in: handle to a file */ + __attribute__((warn_unused_result)); /***********************************************************************//** Write the specified number of zeros to a newly created file. @return TRUE if success */ @@ -841,9 +797,8 @@ os_file_set_size( const char* name, /*!< in: name of the file or path as a null-terminated string */ os_file_t file, /*!< in: handle to a file */ - ulint size, /*!< in: least significant 32 bits of file - size */ - ulint size_high);/*!< in: most significant 32 bits of size */ + os_offset_t size) /*!< in: file size */ + __attribute__((nonnull, warn_unused_result)); /***********************************************************************//** Truncates a file at its current position. @return TRUE if success */ @@ -883,10 +838,7 @@ os_file_read_func( /*==============*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read */ ulint n); /*!< in: number of bytes to read */ /*******************************************************************//** Rewind file to its start, read at most size - 1 bytes from it to str, and @@ -911,10 +863,7 @@ os_file_read_no_error_handling_func( /*================================*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read */ ulint n); /*!< in: number of bytes to read */ /*******************************************************************//** @@ -930,10 +879,7 @@ os_file_write_func( null-terminated string */ os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to write */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to write */ ulint n); /*!< in: number of bytes to write */ /*******************************************************************//** Check the existence and type of the given file. @@ -1037,10 +983,7 @@ os_aio_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read or write */ - ulint offset_high, /*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read or write */ ulint n, /*!< in: number of bytes to read or write */ fil_node_t* message1,/*!< in: message for the aio handler (can be used to identify a completed diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic index 648070c6909..bdd7eb5f8f4 100644 --- a/storage/innobase/include/os0file.ic +++ b/storage/innobase/include/os0file.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -40,13 +40,7 @@ pfs_os_file_create_simple_func( mysql_pfs_key_t key, /*!< in: Performance Schema Key */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is - opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), or - OS_FILE_CREATE_PATH if new file - (if exists, error) and subdirectories along - its path are created (if needed)*/ + ulint create_mode,/*!< in: create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ ibool* success,/*!< out: TRUE if succeed, FALSE if error */ @@ -88,10 +82,7 @@ pfs_os_file_create_simple_no_error_handling_func( mysql_pfs_key_t key, /*!< in: Performance Schema Key */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error) */ + ulint create_mode, /*!< in: file create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or OS_FILE_READ_ALLOW_DELETE; the last option is @@ -133,14 +124,7 @@ pfs_os_file_create_func( mysql_pfs_key_t key, /*!< in: Performance Schema Key */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), - OS_FILE_OVERWRITE if a new file is created - or an old overwritten; - OS_FILE_OPEN_RAW, if a raw device or disk - partition should be opened */ + ulint create_mode,/*!< in: file create mode */ ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous, non-buffered i/o is desired, OS_FILE_NORMAL, if any normal file; @@ -216,10 +200,7 @@ pfs_os_aio_func( os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read or write */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read or write */ ulint n, /*!< in: number of bytes to read or write */ fil_node_t* message1,/*!< in: message for the aio handler (can be used to identify a completed @@ -243,7 +224,7 @@ pfs_os_aio_func( : PSI_FILE_READ, src_file, src_line); - result = os_aio_func(type, mode, name, file, buf, offset, offset_high, + result = os_aio_func(type, mode, name, file, buf, offset, n, message1, message2); register_pfs_file_io_end(locker, n); @@ -263,10 +244,7 @@ pfs_os_file_read_func( /*==================*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ @@ -278,7 +256,7 @@ pfs_os_file_read_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_func(file, buf, offset, offset_high, n); + result = os_file_read_func(file, buf, offset, n); register_pfs_file_io_end(locker, n); @@ -299,10 +277,7 @@ pfs_os_file_read_no_error_handling_func( /*====================================*/ os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to read */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ @@ -314,8 +289,7 @@ pfs_os_file_read_no_error_handling_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ, src_file, src_line); - result = os_file_read_no_error_handling_func(file, buf, offset, - offset_high, n); + result = os_file_read_no_error_handling_func(file, buf, offset, n); register_pfs_file_io_end(locker, n); @@ -336,10 +310,7 @@ pfs_os_file_write_func( null-terminated string */ os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ - ulint offset, /*!< in: least significant 32 bits of file - offset where to write */ - ulint offset_high,/*!< in: most significant 32 bits of - offset */ + os_offset_t offset, /*!< in: file offset where to write */ ulint n, /*!< in: number of bytes to write */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ @@ -351,7 +322,7 @@ pfs_os_file_write_func( register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE, src_file, src_line); - result = os_file_write_func(name, file, buf, offset, offset_high, n); + result = os_file_write_func(name, file, buf, offset, n); register_pfs_file_io_end(locker, n); diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h index fd46bd7db87..613e3bd6947 100644 --- a/storage/innobase/include/os0proc.h +++ b/storage/innobase/include/os0proc.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/os0proc.ic b/storage/innobase/include/os0proc.ic index c9641644525..506f4f8ce0c 100644 --- a/storage/innobase/include/os0proc.ic +++ b/storage/innobase/include/os0proc.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index 1b98f94f641..d68823b72ca 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -36,21 +36,33 @@ Created 9/6/1995 Heikki Tuuri #include "univ.i" #include "ut0lst.h" +#include "sync0types.h" #ifdef __WIN__ /** Native event (slow)*/ typedef HANDLE os_native_event_t; /** Native mutex */ -typedef CRITICAL_SECTION os_fast_mutex_t; +typedef CRITICAL_SECTION fast_mutex_t; /** Native condition variable. */ typedef CONDITION_VARIABLE os_cond_t; #else /** Native mutex */ -typedef pthread_mutex_t os_fast_mutex_t; +typedef pthread_mutex_t fast_mutex_t; /** Native condition variable */ typedef pthread_cond_t os_cond_t; #endif +/** Structure that includes Performance Schema Probe pfs_psi +in the os_fast_mutex structure if UNIV_PFS_MUTEX is defined */ +typedef struct os_fast_mutex_struct { + fast_mutex_t mutex; /*!< os_fast_mutex */ +#ifdef UNIV_PFS_MUTEX + struct PSI_mutex* pfs_psi;/*!< The performance schema + instrumentation hook */ +#endif +} os_fast_mutex_t; + + /** Operating system event */ typedef struct os_event_struct os_event_struct_t; /** Operating system event handle */ @@ -87,6 +99,9 @@ typedef struct os_mutex_struct os_mutex_str_t; /** Operating system mutex handle */ typedef os_mutex_str_t* os_mutex_t; +/** Return value of os_event_wait_time() when the time is exceeded */ +#define OS_SYNC_TIME_EXCEEDED 1 + /** Mutex protecting counts and the event and OS 'slow' mutex lists */ extern os_mutex_t os_sync_mutex; @@ -181,7 +196,7 @@ os_event_wait_low( /**********************************************************//** Waits for an event object until it is in the signaled state or a timeout is exceeded. In Unix the timeout is always infinite. -@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ +@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ UNIV_INTERN ulint os_event_wait_time_low( @@ -231,34 +246,119 @@ ulint os_fast_mutex_trylock( /*==================*/ os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */ + +/********************************************************************** +Following os_fast_ mutex APIs would be performance schema instrumented: + +os_fast_mutex_init +os_fast_mutex_lock +os_fast_mutex_unlock +os_fast_mutex_free + +These mutex APIs will point to corresponding wrapper functions that contain +the performance schema instrumentation. + +NOTE! The following macro should be used in mutex operation, not the +corresponding function. */ + +#ifdef UNIV_PFS_MUTEX +# define os_fast_mutex_init(K, M) \ + pfs_os_fast_mutex_init(K, M) + +# define os_fast_mutex_lock(M) \ + pfs_os_fast_mutex_lock(M, __FILE__, __LINE__) + +# define os_fast_mutex_unlock(M) pfs_os_fast_mutex_unlock(M) + +# define os_fast_mutex_free(M) pfs_os_fast_mutex_free(M) + +/*********************************************************//** +NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly +this function! +A wrapper function for os_fast_mutex_init_func(). Initializes an operating +system fast mutex semaphore. */ +UNIV_INLINE +void +pfs_os_fast_mutex_init( +/*===================*/ + PSI_mutex_key key, /*!< in: Performance Schema + key */ + os_fast_mutex_t* fast_mutex); /*!< out: fast mutex */ +/**********************************************************//** +NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly +this function! +Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance +schema probes when freeing the mutex */ +UNIV_INLINE +void +pfs_os_fast_mutex_free( +/*===================*/ + os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to free */ +/**********************************************************//** +NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly +this function! +Wrapper function of os_fast_mutex_lock. Acquires ownership of a fast mutex. */ +UNIV_INLINE +void +pfs_os_fast_mutex_lock( +/*===================*/ + os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */ + const char* file_name, /*!< in: file name where + locked */ + ulint line); /*!< in: line where locked */ +/**********************************************************//** +NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly +this function! +Wrapper function of os_fast_mutex_unlock. Releases ownership of a fast mutex. */ +UNIV_INLINE +void +pfs_os_fast_mutex_unlock( +/*=====================*/ + os_fast_mutex_t* fast_mutex); /*!< in/out: mutex to release */ + +#else /* UNIV_PFS_MUTEX */ + +# define os_fast_mutex_init(K, M) \ + os_fast_mutex_init_func(&((os_fast_mutex_t*)(M))->mutex) + +# define os_fast_mutex_lock(M) \ + os_fast_mutex_lock_func(&((os_fast_mutex_t*)(M))->mutex) + +# define os_fast_mutex_unlock(M) \ + os_fast_mutex_unlock_func(&((os_fast_mutex_t*)(M))->mutex) + +# define os_fast_mutex_free(M) \ + os_fast_mutex_free_func(&((os_fast_mutex_t*)(M))->mutex) +#endif /* UNIV_PFS_MUTEX */ + /**********************************************************//** Releases ownership of a fast mutex. */ UNIV_INTERN void -os_fast_mutex_unlock( -/*=================*/ - os_fast_mutex_t* fast_mutex); /*!< in: mutex to release */ +os_fast_mutex_unlock_func( +/*======================*/ + fast_mutex_t* fast_mutex); /*!< in: mutex to release */ /*********************************************************//** Initializes an operating system fast mutex semaphore. */ UNIV_INTERN void -os_fast_mutex_init( -/*===============*/ - os_fast_mutex_t* fast_mutex); /*!< in: fast mutex */ +os_fast_mutex_init_func( +/*====================*/ + fast_mutex_t* fast_mutex); /*!< in: fast mutex */ /**********************************************************//** Acquires ownership of a fast mutex. */ UNIV_INTERN void -os_fast_mutex_lock( -/*===============*/ - os_fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */ +os_fast_mutex_lock_func( +/*====================*/ + fast_mutex_t* fast_mutex); /*!< in: mutex to acquire */ /**********************************************************//** Frees an mutex object. */ UNIV_INTERN void -os_fast_mutex_free( -/*===============*/ - os_fast_mutex_t* fast_mutex); /*!< in: mutex to free */ +os_fast_mutex_free_func( +/*====================*/ + fast_mutex_t* fast_mutex); /*!< in: mutex to free */ /**********************************************************//** Atomic compare-and-swap and increment for InnoDB. */ @@ -304,12 +404,30 @@ amount of increment. */ # define os_atomic_increment_ulint(ptr, amount) \ os_atomic_increment(ptr, amount) +# define os_atomic_increment_uint64(ptr, amount) \ + os_atomic_increment(ptr, amount) + +/* Returns the resulting value, ptr is pointer to target, amount is the +amount to decrement. */ + +# define os_atomic_decrement(ptr, amount) \ + __sync_sub_and_fetch(ptr, amount) + +# define os_atomic_decrement_lint(ptr, amount) \ + os_atomic_decrement(ptr, amount) + +# define os_atomic_decrement_ulint(ptr, amount) \ + os_atomic_decrement(ptr, amount) + /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val */ # define os_atomic_test_and_set_byte(ptr, new_val) \ __sync_lock_test_and_set(ptr, (byte) new_val) +# define os_atomic_test_and_set_ulint(ptr, new_val) \ + __sync_lock_test_and_set(ptr, new_val) + #elif defined(HAVE_IB_SOLARIS_ATOMICS) #define HAVE_ATOMIC_BUILTINS @@ -327,15 +445,15 @@ compare to, new_val is the value to swap in. */ (atomic_cas_ulong(ptr, old_val, new_val) == old_val) # define os_compare_and_swap_lint(ptr, old_val, new_val) \ - ((lint)atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val) + ((lint) atomic_cas_ulong((ulong_t*) ptr, old_val, new_val) == old_val) # ifdef HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS # if SIZEOF_PTHREAD_T == 4 # define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ - ((pthread_t)atomic_cas_32(ptr, old_val, new_val) == old_val) + ((pthread_t) atomic_cas_32(ptr, old_val, new_val) == old_val) # elif SIZEOF_PTHREAD_T == 8 # define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ - ((pthread_t)atomic_cas_64(ptr, old_val, new_val) == old_val) + ((pthread_t) atomic_cas_64(ptr, old_val, new_val) == old_val) # else # error "SIZEOF_PTHREAD_T != 4 or 8" # endif /* SIZEOF_PTHREAD_T CHECK */ @@ -351,44 +469,97 @@ compare to, new_val is the value to swap in. */ Returns the resulting value, ptr is pointer to target, amount is the amount of increment. */ -# define os_atomic_increment_lint(ptr, amount) \ - atomic_add_long_nv((ulong_t*) ptr, amount) - # define os_atomic_increment_ulint(ptr, amount) \ atomic_add_long_nv(ptr, amount) +# define os_atomic_increment_lint(ptr, amount) \ + os_atomic_increment_ulint((ulong_t*) ptr, amount) + +# define os_atomic_increment_uint64(ptr, amount) \ + atomic_add_64_nv(ptr, amount) + +/* Returns the resulting value, ptr is pointer to target, amount is the +amount to decrement. */ + +# define os_atomic_decrement_lint(ptr, amount) \ + os_atomic_increment_ulint((ulong_t*) ptr, -(amount)) + +# define os_atomic_decrement_ulint(ptr, amount) \ + os_atomic_increment_ulint(ptr, -(amount)) + /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val */ # define os_atomic_test_and_set_byte(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) +# define os_atomic_test_and_set_ulint(ptr, new_val) \ + atomic_swap_ulong(ptr, new_val) + #elif defined(HAVE_WINDOWS_ATOMICS) #define HAVE_ATOMIC_BUILTINS -/* On Windows, use Windows atomics / interlocked */ -# ifdef _WIN64 -# define win_cmp_and_xchg InterlockedCompareExchange64 -# define win_xchg_and_add InterlockedExchangeAdd64 -# else /* _WIN64 */ -# define win_cmp_and_xchg InterlockedCompareExchange -# define win_xchg_and_add InterlockedExchangeAdd -# endif +/**********************************************************//** +Atomic compare and exchange of signed integers (both 32 and 64 bit). +@return value found before the exchange. +If it is not equal to old_value the exchange did not happen. */ +UNIV_INLINE +lint +win_cmp_and_xchg_lint( +/*==================*/ + volatile lint* ptr, /*!< in/out: source/destination */ + lint new_val, /*!< in: exchange value */ + lint old_val); /*!< in: value to compare to */ + +/**********************************************************//** +Atomic addition of signed integers. +@return Initial value of the variable pointed to by ptr */ +UNIV_INLINE +lint +win_xchg_and_add( +/*=============*/ + volatile lint* ptr, /*!< in/out: address of destination */ + lint val); /*!< in: number to be added */ + +/**********************************************************//** +Atomic compare and exchange of unsigned integers. +@return value found before the exchange. +If it is not equal to old_value the exchange did not happen. */ +UNIV_INLINE +ulint +win_cmp_and_xchg_ulint( +/*===================*/ + volatile ulint* ptr, /*!< in/out: source/destination */ + ulint new_val, /*!< in: exchange value */ + ulint old_val); /*!< in: value to compare to */ + +/**********************************************************//** +Atomic compare and exchange of 32 bit unsigned integers. +@return value found before the exchange. +If it is not equal to old_value the exchange did not happen. */ +UNIV_INLINE +DWORD +win_cmp_and_xchg_dword( +/*===================*/ + volatile DWORD* ptr, /*!< in/out: source/destination */ + DWORD new_val, /*!< in: exchange value */ + DWORD old_val); /*!< in: value to compare to */ /**********************************************************//** Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ - (win_cmp_and_xchg(ptr, new_val, old_val) == old_val) + (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val) # define os_compare_and_swap_lint(ptr, old_val, new_val) \ - (win_cmp_and_xchg(ptr, new_val, old_val) == old_val) + (win_cmp_and_xchg_lint(ptr, new_val, old_val) == old_val) /* windows thread objects can always be passed to windows atomic functions */ # define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ - (InterlockedCompareExchange(ptr, new_val, old_val) == old_val) + (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val) + # define INNODB_RW_LOCKS_USE_ATOMICS # define IB_ATOMICS_STARTUP_MSG \ "Mutexes and rw_locks use Windows interlocked functions" @@ -401,7 +572,20 @@ amount of increment. */ (win_xchg_and_add(ptr, amount) + amount) # define os_atomic_increment_ulint(ptr, amount) \ - ((ulint) (win_xchg_and_add(ptr, amount) + amount)) + ((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount)) + +# define os_atomic_increment_uint64(ptr, amount) \ + ((ulint) (win_xchg_and_add(ptr, (lint) amount) + amount)) + +/**********************************************************//** +Returns the resulting value, ptr is pointer to target, amount is the +amount to decrement. There is no atomic substract function on Windows */ + +# define os_atomic_decrement_lint(ptr, amount) \ + (win_xchg_and_add(ptr, -(lint) amount) - amount) + +# define os_atomic_decrement_ulint(ptr, amount) \ + ((ulint) (win_xchg_and_add((lint*) ptr, -(lint) amount) - amount)) /**********************************************************//** Returns the old value of *ptr, atomically sets *ptr to new_val. @@ -411,10 +595,55 @@ clobbered */ # define os_atomic_test_and_set_byte(ptr, new_val) \ ((byte) InterlockedExchange(ptr, new_val)) +# define os_atomic_test_and_set_ulong(ptr, new_val) \ + InterlockedExchange(ptr, new_val) + #else # define IB_ATOMICS_STARTUP_MSG \ "Mutexes and rw_locks use InnoDB's own implementation" #endif +#ifdef HAVE_ATOMIC_BUILTINS +#define os_atomic_inc_ulint(m,v,d) os_atomic_increment_ulint(v, d) +#define os_atomic_dec_ulint(m,v,d) os_atomic_decrement_ulint(v, d) +#else +#define os_atomic_inc_ulint(m,v,d) os_atomic_inc_ulint_func(m, v, d) +#define os_atomic_dec_ulint(m,v,d) os_atomic_dec_ulint_func(m, v, d) +#endif /* HAVE_ATOMIC_BUILTINS */ + +/**********************************************************//** +Following macros are used to update specified counter atomically +if HAVE_ATOMIC_BUILTINS defined. Otherwise, use mutex passed in +for synchronization */ +#ifdef HAVE_ATOMIC_BUILTINS +#define os_increment_counter_by_amount(mutex, counter, amount) \ + (void) os_atomic_increment_ulint(&counter, amount) + +#define os_decrement_counter_by_amount(mutex, counter, amount) \ + (void) os_atomic_increment_ulint(&counter, (-((lint) amount))) +#else +#define os_increment_counter_by_amount(mutex, counter, amount) \ + do { \ + mutex_enter(&(mutex)); \ + (counter) += (amount); \ + mutex_exit(&(mutex)); \ + } while (0) + +#define os_decrement_counter_by_amount(mutex, counter, amount) \ + do { \ + ut_a(counter >= amount); \ + mutex_enter(&(mutex)); \ + (counter) -= (amount); \ + mutex_exit(&(mutex)); \ + } while (0) +#endif /* HAVE_ATOMIC_BUILTINS */ + +#define os_inc_counter(mutex, counter) \ + os_increment_counter_by_amount(mutex, counter, 1) + +#define os_dec_counter(mutex, counter) \ + do { \ + os_decrement_counter_by_amount(mutex, counter, 1);\ + } while (0); #ifndef UNIV_NONINL #include "os0sync.ic" diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic index c33f13aaad6..0d907b31366 100644 --- a/storage/innobase/include/os0sync.ic +++ b/storage/innobase/include/os0sync.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -36,14 +36,10 @@ os_fast_mutex_trylock( /*==================*/ os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */ { -#ifdef __WIN__ - if (TryEnterCriticalSection(fast_mutex)) { - - return(0); - } else { + fast_mutex_t* mutex = &fast_mutex->mutex; - return(1); - } +#ifdef __WIN__ + return(!TryEnterCriticalSection(mutex)); #else /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock so that it returns 0 on success. In the operating system @@ -51,6 +47,186 @@ os_fast_mutex_trylock( returns 1 on success (but MySQL remaps that to 0), while Linux, FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */ - return((ulint) pthread_mutex_trylock(fast_mutex)); + return((ulint) pthread_mutex_trylock(mutex)); +#endif +} + +#ifdef UNIV_PFS_MUTEX +/*********************************************************//** +NOTE! Please use the corresponding macro os_fast_mutex_init(), not directly +this function! +A wrapper function for os_fast_mutex_init_func(). Initializes an operating +system fast mutex semaphore. */ +UNIV_INLINE +void +pfs_os_fast_mutex_init( +/*===================*/ + PSI_mutex_key key, /*!< in: Performance Schema + key */ + os_fast_mutex_t* fast_mutex) /*!< out: fast mutex */ +{ +#ifdef HAVE_PSI_MUTEX_INTERFACE + fast_mutex->pfs_psi = PSI_CALL(init_mutex)(key, &fast_mutex->mutex); +#else + fast_mutex->pfs_psi = NULL; +#endif + + os_fast_mutex_init_func(&fast_mutex->mutex); +} +/******************************************************************//** +NOTE! Please use the corresponding macro os_fast_mutex_free(), not directly +this function! +Wrapper function for pfs_os_fast_mutex_free(). Also destroys the performance +schema probes when freeing the mutex */ +UNIV_INLINE +void +pfs_os_fast_mutex_free( +/*===================*/ + os_fast_mutex_t* fast_mutex) /*!< in/out: mutex */ +{ +#ifdef HAVE_PSI_MUTEX_INTERFACE + if (fast_mutex->pfs_psi != NULL) + PSI_CALL(destroy_mutex)(fast_mutex->pfs_psi); #endif + fast_mutex->pfs_psi = NULL; + + os_fast_mutex_free_func(&fast_mutex->mutex); } +/**********************************************************//** +NOTE! Please use the corresponding macro os_fast_mutex_lock, not directly +this function! +Wrapper function of os_fast_mutex_lock_func. Acquires ownership of a fast +mutex. */ +UNIV_INLINE +void +pfs_os_fast_mutex_lock( +/*===================*/ + os_fast_mutex_t* fast_mutex, /*!< in/out: mutex to acquire */ + const char* file_name, /*!< in: file name where + locked */ + ulint line) /*!< in: line where locked */ +{ +#ifdef HAVE_PSI_MUTEX_INTERFACE + if (fast_mutex->pfs_psi != NULL) + { + PSI_mutex_locker* locker; + PSI_mutex_locker_state state; + + locker = PSI_CALL(start_mutex_wait)(&state, fast_mutex->pfs_psi, + PSI_MUTEX_LOCK, file_name, line); + + os_fast_mutex_lock_func(&fast_mutex->mutex); + + if (locker != NULL) + PSI_CALL(end_mutex_wait)(locker, 0); + } + else +#endif + { + os_fast_mutex_lock_func(&fast_mutex->mutex); + } + + return; +} +/**********************************************************//** +NOTE! Please use the corresponding macro os_fast_mutex_unlock, not directly +this function! +Wrapper function of os_fast_mutex_unlock_func. Releases ownership of a +fast mutex. */ +UNIV_INLINE +void +pfs_os_fast_mutex_unlock( +/*=====================*/ + os_fast_mutex_t* fast_mutex) /*!< in/out: mutex to release */ +{ +#ifdef HAVE_PSI_MUTEX_INTERFACE + if (fast_mutex->pfs_psi != NULL) + PSI_CALL(unlock_mutex)(fast_mutex->pfs_psi); +#endif + + os_fast_mutex_unlock_func(&fast_mutex->mutex); +} +#endif /* UNIV_PFS_MUTEX */ + +#ifdef HAVE_WINDOWS_ATOMICS + +/* Use inline functions to make 64 and 32 bit versions of windows atomic +functions so that typecasts are evaluated at compile time. Take advantage +that lint is either __int64 or long int and windows atomic functions work +on __int64 and LONG */ + +/**********************************************************//** +Atomic compare and exchange of unsigned integers. +@return value found before the exchange. +If it is not equal to old_value the exchange did not happen. */ +UNIV_INLINE +lint +win_cmp_and_xchg_lint( +/*==================*/ + volatile lint* ptr, /*!< in/out: source/destination */ + lint new_val, /*!< in: exchange value */ + lint old_val) /*!< in: value to compare to */ +{ +# ifdef _WIN64 + return(InterlockedCompareExchange64(ptr, new_val, old_val)); +# else + return(InterlockedCompareExchange(ptr, new_val, old_val)); +# endif +} + +/**********************************************************//** +Atomic addition of signed integers. +@return Initial value of the variable pointed to by ptr */ +UNIV_INLINE +lint +win_xchg_and_add( +/*=============*/ + volatile lint* ptr, /*!< in/out: address of destination */ + lint val) /*!< in: number to be added */ +{ +#ifdef _WIN64 + return(InterlockedExchangeAdd64(ptr, val)); +#else + return(InterlockedExchangeAdd(ptr, val)); +#endif +} + +/**********************************************************//** +Atomic compare and exchange of unsigned integers. +@return value found before the exchange. +If it is not equal to old_value the exchange did not happen. */ +UNIV_INLINE +ulint +win_cmp_and_xchg_ulint( +/*===================*/ + volatile ulint* ptr, /*!< in/out: source/destination */ + ulint new_val, /*!< in: exchange value */ + ulint old_val) /*!< in: value to compare to */ +{ + return((ulint) win_cmp_and_xchg_lint( + (volatile lint*) ptr, + (lint) new_val, + (lint) old_val)); +} + +/**********************************************************//** +Atomic compare and exchange of 32-bit unsigned integers. +@return value found before the exchange. +If it is not equal to old_value the exchange did not happen. */ +UNIV_INLINE +DWORD +win_cmp_and_xchg_dword( +/*===================*/ + volatile DWORD* ptr, /*!< in/out: source/destination */ + DWORD new_val, /*!< in: exchange value */ + DWORD old_val) /*!< in: value to compare to */ +{ + ut_ad(sizeof(DWORD) == sizeof(LONG)); /* We assume this. */ + return(InterlockedCompareExchange( + (volatile LONG*) ptr, + (LONG) new_val, + (LONG) old_val)); +} + +#endif /* HAVE_WINDOWS_ATOMICS */ + diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h index df3cdb7728e..37c54afae80 100644 --- a/storage/innobase/include/os0thread.h +++ b/storage/innobase/include/os0thread.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -35,7 +35,6 @@ can wait inside InnoDB */ #define OS_THREAD_MAX_N srv_max_n_threads - /* Possible fixed priorities for threads */ #define OS_THREAD_PRIORITY_NONE 100 #define OS_THREAD_PRIORITY_BACKGROUND 1 @@ -44,14 +43,35 @@ can wait inside InnoDB */ #ifdef __WIN__ typedef void* os_thread_t; -typedef unsigned long os_thread_id_t; /*!< In Windows the thread id +typedef DWORD os_thread_id_t; /*!< In Windows the thread id is an unsigned long int */ +extern "C" { +typedef LPTHREAD_START_ROUTINE os_thread_func_t; +} + +/** Macro for specifying a Windows thread start function. */ +#define DECLARE_THREAD(func) WINAPI func + +/** Required to get around a build error on Windows. Even though our functions +are defined/declared as WINAPI f(LPVOID a); the compiler complains that they +are defined as: os_thread_ret_t (__cdecl*)(void*). Because our functions +don't access the arguments and don't return any value, we should be safe. */ +#define os_thread_create(f,a,i) \ + os_thread_create_func(reinterpret_cast<os_thread_func_t>(f), a, i) + #else + typedef pthread_t os_thread_t; typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread handle itself as the id of the thread */ -#endif +extern "C" { typedef void* (*os_thread_func_t)(void*); } + +/** Macro for specifying a POSIX thread start function. */ +#define DECLARE_THREAD(func) func +#define os_thread_create(f,a,i) os_thread_create_func(f, a, i) + +#endif /* __WIN__ */ /* Define a function pointer type to use in a typecast */ typedef void* (*os_posix_f_t) (void*); @@ -88,14 +108,10 @@ thread should always use that to exit and not use return() to exit. @return handle to the thread */ UNIV_INTERN os_thread_t -os_thread_create( -/*=============*/ -#ifndef __WIN__ - os_posix_f_t start_f, -#else - ulint (*start_f)(void*), /*!< in: pointer to function +os_thread_create_func( +/*==================*/ + os_thread_func_t func, /*!< in: pointer to function from which to start */ -#endif void* arg, /*!< in: argument to start function */ os_thread_id_t* thread_id); /*!< out: id of the created diff --git a/storage/innobase/include/os0thread.ic b/storage/innobase/include/os0thread.ic index f89bc40b4fa..0622d22f2dc 100644 --- a/storage/innobase/include/os0thread.ic +++ b/storage/innobase/include/os0thread.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h index 1544b0abe1c..52f5c5de58a 100644 --- a/storage/innobase/include/page0cur.h +++ b/storage/innobase/include/page0cur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic index 3520677dfb3..a065f9ff30d 100644 --- a/storage/innobase/include/page0cur.ic +++ b/storage/innobase/include/page0cur.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -27,6 +27,8 @@ Created 10/4/1994 Heikki Tuuri #include "buf0types.h" #ifdef UNIV_DEBUG +# include "rem0cmp.h" + /*********************************************************//** Gets pointer to the page frame where the cursor is positioned. @return page */ @@ -268,6 +270,7 @@ page_cur_tuple_insert( index, rec, offsets, mtr); } + ut_ad(!rec || !cmp_dtuple_rec(tuple, rec, offsets)); mem_heap_free(heap); return(rec); } diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index 74e9ceca959..e4571b69376 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index 781ad029e87..e73e547e92b 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -136,7 +136,7 @@ page_header_set_field( ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE); mach_write_to_2(page + PAGE_HEADER + field, val); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { page_zip_write_header(page_zip, page + PAGE_HEADER + field, 2, NULL); } @@ -211,7 +211,7 @@ page_header_reset_last_insert( { ut_ad(page && mtr); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0); page_zip_write_header(page_zip, page + (PAGE_HEADER + PAGE_LAST_INSERT), @@ -233,8 +233,7 @@ page_is_comp( /*=========*/ const page_t* page) /*!< in: index page */ { - return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000, - 0x8000)); + return(page_header_get_field(page, PAGE_N_HEAP) & 0x8000); } /************************************************************//** @@ -345,10 +344,10 @@ page_rec_is_user_rec_low( #endif ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - return(UNIV_LIKELY(offset != PAGE_NEW_SUPREMUM) - && UNIV_LIKELY(offset != PAGE_NEW_INFIMUM) - && UNIV_LIKELY(offset != PAGE_OLD_INFIMUM) - && UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM)); + return(offset != PAGE_NEW_SUPREMUM + && offset != PAGE_NEW_INFIMUM + && offset != PAGE_OLD_INFIMUM + && offset != PAGE_OLD_SUPREMUM); } /************************************************************//** @@ -363,8 +362,8 @@ page_rec_is_supremum_low( ut_ad(offset >= PAGE_NEW_INFIMUM); ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - return(UNIV_UNLIKELY(offset == PAGE_NEW_SUPREMUM) - || UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM)); + return(offset == PAGE_NEW_SUPREMUM + || offset == PAGE_OLD_SUPREMUM); } /************************************************************//** @@ -379,8 +378,7 @@ page_rec_is_infimum_low( ut_ad(offset >= PAGE_NEW_INFIMUM); ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - return(UNIV_UNLIKELY(offset == PAGE_NEW_INFIMUM) - || UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM)); + return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM); } /************************************************************//** @@ -484,12 +482,14 @@ page_cmp_dtuple_rec_with_match( rec_offset = page_offset(rec); - if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_INFIMUM) - || UNIV_UNLIKELY(rec_offset == PAGE_OLD_INFIMUM)) { + if (rec_offset == PAGE_NEW_INFIMUM + || rec_offset == PAGE_OLD_INFIMUM) { + return(1); - } - if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_SUPREMUM) - || UNIV_UNLIKELY(rec_offset == PAGE_OLD_SUPREMUM)) { + + } else if (rec_offset == PAGE_NEW_SUPREMUM + || rec_offset == PAGE_OLD_SUPREMUM) { + return(-1); } @@ -731,21 +731,19 @@ page_rec_get_next_low( offs = rec_get_next_offs(rec, comp); - if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) { + if (offs >= UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Next record offset is nonsensical %lu" " in record at offset %lu\n" "InnoDB: rec address %p, space id %lu, page %lu\n", - (ulong)offs, (ulong) page_offset(rec), + (ulong) offs, (ulong) page_offset(rec), (void*) rec, (ulong) page_get_space_id(page), (ulong) page_get_page_no(page)); buf_page_print(page, 0, 0); ut_error; - } - - if (UNIV_UNLIKELY(offs == 0)) { + } else if (offs == 0) { return(NULL); } @@ -797,11 +795,7 @@ page_rec_set_next( ut_ad(!next || !page_rec_is_infimum(next)); ut_ad(!next || page_align(rec) == page_align(next)); - if (UNIV_LIKELY(next != NULL)) { - offs = page_offset(next); - } else { - offs = 0; - } + offs = next != NULL ? page_offset(next) : 0; if (page_rec_is_comp(rec)) { rec_set_next_offs_new(rec, offs); @@ -976,7 +970,7 @@ page_get_free_space_of_empty( /*=========================*/ ulint comp) /*!< in: nonzero=compact page layout */ { - if (UNIV_LIKELY(comp)) { + if (comp) { return((ulint)(UNIV_PAGE_SIZE - PAGE_NEW_SUPREMUM_END - PAGE_DIR @@ -1111,7 +1105,7 @@ page_mem_free( page_header_set_field(page, page_zip, PAGE_GARBAGE, garbage + rec_offs_size(offsets)); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { page_zip_dir_delete(page_zip, rec, index, offsets, free); } else { page_header_set_field(page, page_zip, PAGE_N_RECS, diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h index d9a277bf208..da2ac1c7de2 100644 --- a/storage/innobase/include/page0types.h +++ b/storage/innobase/include/page0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -49,15 +49,14 @@ page0*.h includes rem0rec.h and may include rem0rec.ic. */ /** Number of bits needed for representing different compressed page sizes */ #define PAGE_ZIP_SSIZE_BITS 3 -/** log2 of smallest compressed page size */ -#define PAGE_ZIP_MIN_SIZE_SHIFT 10 -/** Smallest compressed page size */ -#define PAGE_ZIP_MIN_SIZE (1 << PAGE_ZIP_MIN_SIZE_SHIFT) +/** Maximum compressed page shift size */ +#define PAGE_ZIP_SSIZE_MAX \ + (UNIV_ZIP_SIZE_SHIFT_MAX - UNIV_ZIP_SIZE_SHIFT_MIN + 1) -/** Number of supported compressed page sizes */ -#define PAGE_ZIP_NUM_SSIZE (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 2) -#if PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS) -# error "PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)" +/* Make sure there are enough bits available to store the maximum zip +ssize, which is the number of shifts from 512. */ +#if PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS) +# error "PAGE_ZIP_SSIZE_MAX >= (1 << PAGE_ZIP_SSIZE_BITS)" #endif /** Compressed page descriptor */ @@ -75,9 +74,9 @@ struct page_zip_des_struct columns on the page; the maximum is 744 on a 16 KiB page */ unsigned ssize:PAGE_ZIP_SSIZE_BITS; - /*!< 0 or compressed page size; + /*!< 0 or compressed page shift size; the size in bytes is - PAGE_ZIP_MIN_SIZE << (ssize - 1). */ + (UNIV_ZIP_SIZE_MIN >> 1) << ssize. */ }; /** Compression statistics for a given page size */ @@ -98,7 +97,7 @@ struct page_zip_stat_struct { typedef struct page_zip_stat_struct page_zip_stat_t; /** Statistics on compression, indexed by page_zip_des_struct::ssize - 1 */ -extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1]; +extern page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX]; /**********************************************************************//** Write the "deleted" flag of a record on a compressed page. The flag must diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h index 00c1d0516e6..f127fad2260 100644 --- a/storage/innobase/include/page0zip.h +++ b/storage/innobase/include/page0zip.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -35,6 +35,7 @@ Created June 2005 by Marko Makela #include "page0types.h" #include "buf0types.h" #include "dict0types.h" +#include "srv0srv.h" #include "trx0types.h" #include "mem0mem.h" @@ -444,9 +445,21 @@ ulint page_zip_calc_checksum( /*===================*/ const void* data, /*!< in: compressed page */ - ulint size) /*!< in: size of compressed page */ + ulint size, /*!< in: size of compressed page */ + srv_checksum_algorithm_t algo) /*!< in: algorithm to use */ __attribute__((nonnull)); +/**********************************************************************//** +Verify a compressed page's checksum. +@return TRUE if the stored checksum is valid according to the value of +innodb_checksum_algorithm */ +UNIV_INTERN +ibool +page_zip_verify_checksum( +/*=====================*/ + const void* data, /*!< in: compressed page */ + ulint size); /*!< in: size of compressed page */ + #ifndef UNIV_HOTBACKUP /** Check if a pointer to an uncompressed page matches a compressed page. @param ptr pointer to an uncompressed page frame diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic index 75cc7a9fcc4..c9300aa4e9f 100644 --- a/storage/innobase/include/page0zip.ic +++ b/storage/innobase/include/page0zip.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -120,13 +120,13 @@ page_zip_get_size( { ulint size; - if (UNIV_UNLIKELY(!page_zip->ssize)) { + if (!page_zip->ssize) { return(0); } - size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize; + size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize; - ut_ad(size >= PAGE_ZIP_MIN_SIZE); + ut_ad(size >= UNIV_ZIP_SIZE_MIN); ut_ad(size <= UNIV_PAGE_SIZE); return(size); @@ -175,12 +175,12 @@ page_zip_rec_needs_ext( ut_ad(comp || !zip_size); #if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE - if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) { + if (rec_size >= REC_MAX_DATA_SIZE) { return(TRUE); } #endif - if (UNIV_UNLIKELY(zip_size)) { + if (zip_size) { ut_ad(comp); /* On a compressed page, there is a two-byte entry in the dense page directory for every record. But there @@ -209,7 +209,7 @@ page_zip_simple_validate( { ut_ad(page_zip); ut_ad(page_zip->data); - ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE); + ut_ad(page_zip->ssize <= PAGE_ZIP_SSIZE_MAX); ut_ad(page_zip_get_size(page_zip) > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE); ut_ad(page_zip->m_start <= page_zip->m_end); @@ -238,11 +238,11 @@ page_zip_get_trailer_len( ut_ad(page_zip_simple_validate(page_zip)); UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); - if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) { + if (!page_is_leaf(page_zip->data)) { uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE; ut_ad(!page_zip->n_blobs); - } else if (UNIV_UNLIKELY(is_clust)) { + } else if (is_clust) { uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; } else { @@ -315,7 +315,7 @@ page_zip_available( space needed for identifying the record (encoded heap_no). */ length -= REC_N_NEW_EXTRA_BYTES - 2; - if (UNIV_UNLIKELY(create)) { + if (create > 0) { /* When a record is created, a pointer may be added to the dense directory. Likewise, space for the columns that will not be @@ -326,10 +326,8 @@ page_zip_available( trailer_len += uncompressed_size; } - return(UNIV_LIKELY(length - + trailer_len - + page_zip->m_end - < page_zip_get_size(page_zip))); + return(length + trailer_len + page_zip->m_end + < page_zip_get_size(page_zip)); } /**********************************************************************//** @@ -384,7 +382,7 @@ page_zip_write_header( /* The following would fail in page_cur_insert_rec_zip(). */ /* ut_ad(page_zip_validate(page_zip, str - pos)); */ - if (UNIV_LIKELY_NULL(mtr)) { + if (mtr) { #ifndef UNIV_HOTBACKUP page_zip_write_header_log(str, length, mtr); #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/pars0grm.h b/storage/innobase/include/pars0grm.h index 3de233eed3a..8e725fe9545 100644 --- a/storage/innobase/include/pars0grm.h +++ b/storage/innobase/include/pars0grm.h @@ -1,29 +1,37 @@ -/***************************************************************************** +/* A Bison parser, made by GNU Bison 2.3. */ -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. -Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software -Foundation, Inc. +/* Skeleton interface for Bison's Yacc-like parsers in C -As a special exception, when this file is copied by Bison into a -Bison output file, you may use that output file without restriction. -This special exception was added by the Free Software Foundation -in version 1.24 of Bison. + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, + Boston, MA 02110-1301, USA. */ -*****************************************************************************/ +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. -/* A Bison parser, made by GNU Bison 1.875d. */ + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ /* Tokens. */ #ifndef YYTOKENTYPE @@ -123,9 +131,19 @@ Place, Suite 330, Boston, MA 02111-1307 USA PARS_LOCK_TOKEN = 347, PARS_SHARE_TOKEN = 348, PARS_MODE_TOKEN = 349, - NEG = 350 + PARS_LIKE_TOKEN = 350, + PARS_LIKE_TOKEN_EXACT = 351, + PARS_LIKE_TOKEN_PREFIX = 352, + PARS_LIKE_TOKEN_SUFFIX = 353, + PARS_LIKE_TOKEN_SUBSTR = 354, + PARS_TABLE_NAME_TOKEN = 355, + PARS_COMPACT_TOKEN = 356, + PARS_BLOCK_SIZE_TOKEN = 357, + PARS_BIGINT_TOKEN = 358, + NEG = 359 }; #endif +/* Tokens. */ #define PARS_INT_LIT 258 #define PARS_FLOAT_LIT 259 #define PARS_STR_LIT 260 @@ -218,12 +236,21 @@ Place, Suite 330, Boston, MA 02111-1307 USA #define PARS_LOCK_TOKEN 347 #define PARS_SHARE_TOKEN 348 #define PARS_MODE_TOKEN 349 -#define NEG 350 +#define PARS_LIKE_TOKEN 350 +#define PARS_LIKE_TOKEN_EXACT 351 +#define PARS_LIKE_TOKEN_PREFIX 352 +#define PARS_LIKE_TOKEN_SUFFIX 353 +#define PARS_LIKE_TOKEN_SUBSTR 354 +#define PARS_TABLE_NAME_TOKEN 355 +#define PARS_COMPACT_TOKEN 356 +#define PARS_BLOCK_SIZE_TOKEN 357 +#define PARS_BIGINT_TOKEN 358 +#define NEG 359 -#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED typedef int YYSTYPE; # define yystype YYSTYPE /* obsolescent; will be withdrawn */ # define YYSTYPE_IS_DECLARED 1 @@ -232,5 +259,3 @@ typedef int YYSTYPE; extern YYSTYPE yylval; - - diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h index 42d956068f8..1084d644c90 100644 --- a/storage/innobase/include/pars0opt.h +++ b/storage/innobase/include/pars0opt.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/pars0opt.ic b/storage/innobase/include/pars0opt.ic index e0bb6bf1af2..786d911ca3d 100644 --- a/storage/innobase/include/pars0opt.ic +++ b/storage/innobase/include/pars0opt.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h index 141b2706d7d..9eb8aeb747f 100644 --- a/storage/innobase/include/pars0pars.h +++ b/storage/innobase/include/pars0pars.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -38,7 +38,7 @@ Created 11/19/1996 Heikki Tuuri and varies in type, while 'user_arg' is a user-supplied argument. The meaning of the return type also varies. See the individual use cases, e.g. the FETCH statement, for details on them. */ -typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg); +typedef ibool (*pars_user_func_cb_t)(void* arg, void* user_arg); /** If the following is set TRUE, the parser will emit debugging information */ @@ -74,6 +74,7 @@ extern pars_res_word_t pars_distinct_token; extern pars_res_word_t pars_binary_token; extern pars_res_word_t pars_blob_token; extern pars_res_word_t pars_int_token; +extern pars_res_word_t pars_bigint_token; extern pars_res_word_t pars_char_token; extern pars_res_word_t pars_float_token; extern pars_res_word_t pars_update_token; @@ -105,13 +106,13 @@ pars_sql( pars_info_t* info, /*!< in: extra information, or NULL */ const char* str); /*!< in: SQL string */ /*************************************************************//** -Retrieves characters to the lexical analyzer. */ +Retrieves characters to the lexical analyzer. +@return number of characters copied or 0 on EOF */ UNIV_INTERN -void +int pars_get_lex_chars( /*===============*/ char* buf, /*!< in/out: buffer where to copy */ - int* result, /*!< out: number of characters copied or EOF */ int max_size); /*!< in: maximum number of characters which fit in the buffer */ /*************************************************************//** @@ -140,6 +141,17 @@ pars_func( /*======*/ que_node_t* res_word,/*!< in: function name reserved word */ que_node_t* arg); /*!< in: first argument in the argument list */ +/************************************************************************* +Rebind a LIKE search string. NOTE: We ignore any '%' characters embedded +within the search string. +@return own: function node in a query tree */ +UNIV_INTERN +int +pars_like_rebind( +/*=============*/ + sym_node_t* node, /* in: The search string node.*/ + const byte* ptr, /* in: literal to (re) bind */ + ulint len); /* in: length of literal to (re) bind*/ /*********************************************************************//** Parses an operator expression. @return own: function node in a query tree */ @@ -397,7 +409,10 @@ pars_create_table( sym_node_t* table_sym, /*!< in: table name node in the symbol table */ sym_node_t* column_defs, /*!< in: list of column names */ - void* not_fit_in_memory);/*!< in: a non-NULL pointer means that + sym_node_t* compact, /* in: non-NULL if COMPACT table. */ + sym_node_t* block_size, /* in: block size (can be NULL) */ + void* not_fit_in_memory); + /*!< in: a non-NULL pointer means that this is a table which in simulations should be simulated as not fitting in memory; thread is put to sleep @@ -498,7 +513,76 @@ pars_info_add_str_literal( pars_info_t* info, /*!< in: info struct */ const char* name, /*!< in: name */ const char* str); /*!< in: string */ +/******************************************************************** +If the literal value already exists then it rebinds otherwise it +creates a new entry.*/ +UNIV_INTERN +void +pars_info_bind_literal( +/*===================*/ + pars_info_t* info, /* in: info struct */ + const char* name, /* in: name */ + const void* address, /* in: address */ + ulint length, /* in: length of data */ + ulint type, /* in: type, e.g. DATA_FIXBINARY */ + ulint prtype); /* in: precise type, e.g. */ +/******************************************************************** +If the literal value already exists then it rebinds otherwise it +creates a new entry.*/ +UNIV_INTERN +void +pars_info_bind_varchar_literal( +/*===========================*/ + pars_info_t* info, /*!< in: info struct */ + const char* name, /*!< in: name */ + const byte* str, /*!< in: string */ + ulint str_len); /*!< in: string length */ +/****************************************************************//** +Equivalent to: +char buf[4]; +mach_write_to_4(buf, val); +pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); + +except that the buffer is dynamically allocated from the info struct's +heap. */ +UNIV_INTERN +void +pars_info_bind_int4_literal( +/*=======================*/ + pars_info_t* info, /*!< in: info struct */ + const char* name, /*!< in: name */ + const ib_uint32_t* val); /*!< in: value */ +/******************************************************************** +If the literal value already exists then it rebinds otherwise it +creates a new entry. */ +UNIV_INTERN +void +pars_info_bind_int8_literal( +/*=======================*/ + pars_info_t* info, /*!< in: info struct */ + const char* name, /*!< in: name */ + const ib_uint64_t* val); /*!< in: value */ +/****************************************************************//** +Add user function. */ +UNIV_INTERN +void +pars_info_bind_function( +/*===================*/ + pars_info_t* info, /*!< in: info struct */ + const char* name, /*!< in: function name */ + pars_user_func_cb_t func, /*!< in: function address */ + void* arg); /*!< in: user-supplied argument */ +/****************************************************************//** +Add bound id. */ +UNIV_INTERN +void +pars_info_bind_id( +/*=============*/ + pars_info_t* info, /*!< in: info struct */ + ibool copy_name,/* in: make a copy of name if TRUE */ + const char* name, /*!< in: name */ + const char* id); /*!< in: id */ /****************************************************************//** Equivalent to: @@ -532,16 +616,6 @@ pars_info_add_ull_literal( pars_info_t* info, /*!< in: info struct */ const char* name, /*!< in: name */ ib_uint64_t val); /*!< in: value */ -/****************************************************************//** -Add user function. */ -UNIV_INTERN -void -pars_info_add_function( -/*===================*/ - pars_info_t* info, /*!< in: info struct */ - const char* name, /*!< in: function name */ - pars_user_func_cb_t func, /*!< in: function address */ - void* arg); /*!< in: user-supplied argument */ /****************************************************************//** Add bound id. */ @@ -619,6 +693,7 @@ struct pars_bound_lit_struct { ulint length; /*!< length of data */ ulint type; /*!< type, e.g. DATA_FIXBINARY */ ulint prtype; /*!< precise type, e.g. DATA_UNSIGNED */ + sym_node_t* node; /*!< symbol node */ }; /** Bound identifier. */ @@ -638,7 +713,7 @@ is also used for some non-functions like the assignment ':=' */ struct func_node_struct{ que_common_t common; /*!< type: QUE_NODE_FUNC */ int func; /*!< token code of the function name */ - ulint class; /*!< class of the function */ + ulint fclass; /*!< class of the function */ que_node_t* args; /*!< argument(s) of the function */ UT_LIST_NODE_T(func_node_t) cond_list; /*!< list of comparison conditions; defined diff --git a/storage/innobase/include/pars0pars.ic b/storage/innobase/include/pars0pars.ic index ae6c13cd671..4c88337a265 100644 --- a/storage/innobase/include/pars0pars.ic +++ b/storage/innobase/include/pars0pars.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h index 6d1a4b82414..4b3b342a533 100644 --- a/storage/innobase/include/pars0sym.h +++ b/storage/innobase/include/pars0sym.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -67,7 +67,7 @@ sym_node_t* sym_tab_add_str_lit( /*================*/ sym_tab_t* sym_tab, /*!< in: symbol table */ - byte* str, /*!< in: string with no quotes around + const byte* str, /*!< in: string with no quotes around it */ ulint len); /*!< in: string length */ /******************************************************************//** @@ -80,6 +80,16 @@ sym_tab_add_bound_lit( sym_tab_t* sym_tab, /*!< in: symbol table */ const char* name, /*!< in: name of bound literal */ ulint* lit_type); /*!< out: type of literal (PARS_*_LIT) */ +/********************************************************************** +Rebind literal to a node in the symbol table. */ + +sym_node_t* +sym_tab_rebind_lit( +/*===============*/ + /* out: symbol table node */ + sym_node_t* node, /* in: node that is bound to literal*/ + const void* address, /* in: pointer to data */ + ulint length); /* in: length of data */ /******************************************************************//** Adds an SQL null literal to a symbol table. @return symbol table node */ @@ -116,11 +126,14 @@ sym_tab_add_bound_id( /** Types of a symbol table node */ enum sym_tab_entry { + SYM_UNSET, /*!< Unset entry. */ SYM_VAR = 91, /*!< declared parameter or local variable of a procedure */ SYM_IMPLICIT_VAR, /*!< storage for a intermediate result of a calculation */ SYM_LIT, /*!< literal */ + SYM_TABLE_REF_COUNTED, /*!< database table name, ref counted. Must + be closed explicitly. */ SYM_TABLE, /*!< database table name */ SYM_COLUMN, /*!< database table name */ SYM_CURSOR, /*!< named cursor */ @@ -210,6 +223,7 @@ struct sym_node_struct{ the symbol table */ UT_LIST_NODE_T(sym_node_t) sym_list; /*!< list of symbol nodes */ + sym_node_t* like_node; /* LIKE operator node*/ }; /** Symbol table */ diff --git a/storage/innobase/include/pars0sym.ic b/storage/innobase/include/pars0sym.ic index 9eb09db3a47..266c1a6310d 100644 --- a/storage/innobase/include/pars0sym.ic +++ b/storage/innobase/include/pars0sym.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/pars0types.h b/storage/innobase/include/pars0types.h index e0a8a86bf07..13ae53f3fd6 100644 --- a/storage/innobase/include/pars0types.h +++ b/storage/innobase/include/pars0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1998, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h index 720da6dcb46..531794ce688 100644 --- a/storage/innobase/include/que0que.h +++ b/storage/innobase/include/que0que.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -41,14 +41,9 @@ Created 5/27/1996 Heikki Tuuri of SQL execution in the UNIV_SQL_DEBUG version */ extern ibool que_trace_on; -/***********************************************************************//** -Adds a query graph to the session's list of graphs. */ -UNIV_INTERN -void -que_graph_publish( -/*==============*/ - que_t* graph, /*!< in: graph */ - sess_t* sess); /*!< in: session */ +/** Mutex protecting the query threads. */ +extern mutex_t que_thr_mutex; + /***********************************************************************//** Creates a query graph fork node. @return own: fork node */ @@ -114,8 +109,8 @@ que_graph_free( afterwards! */ /**********************************************************************//** Stops a query thread if graph or trx is in a state requiring it. The -conditions are tested in the order (1) graph, (2) trx. The kernel mutex has -to be reserved. +conditions are tested in the order (1) graph, (2) trx. The lock_sys_t::mutex +has to be reserved. @return TRUE if stopped */ UNIV_INTERN ibool @@ -143,7 +138,7 @@ que_thr_stop_for_mysql_no_error( /**********************************************************************//** A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The query thread is stopped and made inactive, except in the case where -it was put to the lock wait state in lock0lock.c, but the lock has already +it was put to the lock wait state in lock0lock.cc, but the lock has already been granted or the transaction chosen as a victim in deadlock resolution. */ UNIV_INTERN void @@ -158,44 +153,17 @@ que_run_threads( /*============*/ que_thr_t* thr); /*!< in: query thread */ /**********************************************************************//** -After signal handling is finished, returns control to a query graph error -handling routine. (Currently, just returns the control to the root of the -graph so that the graph can communicate an error message to the client.) */ -UNIV_INTERN -void -que_fork_error_handle( -/*==================*/ - trx_t* trx, /*!< in: trx */ - que_t* fork); /*!< in: query graph which was run before signal - handling started, NULL not allowed */ -/**********************************************************************//** -Moves a suspended query thread to the QUE_THR_RUNNING state and releases -a single worker thread to execute it. This function should be used to end +Moves a suspended query thread to the QUE_THR_RUNNING state and release +a worker thread to execute it. This function should be used to end the wait state of a query thread waiting for a lock or a stored procedure -completion. */ +completion. +@return query thread instance of thread to wakeup or NULL */ UNIV_INTERN -void -que_thr_end_wait( -/*=============*/ - que_thr_t* thr, /*!< in: query thread in the - QUE_THR_LOCK_WAIT, - or QUE_THR_PROCEDURE_WAIT, or - QUE_THR_SIG_REPLY_WAIT state */ - que_thr_t** next_thr); /*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/**********************************************************************//** -Same as que_thr_end_wait, but no parameter next_thr available. */ -UNIV_INTERN -void -que_thr_end_wait_no_next_thr( -/*=========================*/ - que_thr_t* thr); /*!< in: query thread in the - QUE_THR_LOCK_WAIT, - or QUE_THR_PROCEDURE_WAIT, or - QUE_THR_SIG_REPLY_WAIT state */ +que_thr_t* +que_thr_end_lock_wait( +/*==================*/ + trx_t* trx); /*!< in: transaction in the + QUE_THR_LOCK_WAIT state */ /**********************************************************************//** Starts execution of a command in a query fork. Picks a query thread which is not in the QUE_THR_RUNNING state and moves it to that state. If none @@ -296,6 +264,14 @@ que_node_list_add_last( /*===================*/ que_node_t* node_list, /*!< in: node list, or NULL */ que_node_t* node); /*!< in: node */ +/************************************************************************* +Get the last node from the list.*/ +UNIV_INLINE +que_node_t* +que_node_list_get_last( +/*===================*/ + /* out: node last node from list.*/ + que_node_t* node_list); /* in: node list, or NULL */ /*********************************************************************//** Gets a query graph node list length. @return length, for NULL list 0 */ @@ -308,7 +284,7 @@ que_node_list_get_len( Checks if graph, trx, or session is in a state where the query thread should be stopped. @return TRUE if should be stopped; NOTE that if the peek is made -without reserving the kernel mutex, then another peek with the mutex +without reserving the trx_t::mutex, then another peek with the mutex reserved is necessary before deciding the actual stopping */ UNIV_INLINE ibool @@ -334,7 +310,7 @@ que_node_print_info( Evaluate the given SQL @return error code or DB_SUCCESS */ UNIV_INTERN -ulint +enum db_err que_eval_sql( /*=========*/ pars_info_t* info, /*!< in: info struct, or NULL */ @@ -344,8 +320,34 @@ que_eval_sql( dict_sys->mutex around call to pars_sql. */ trx_t* trx); /*!< in: trx */ -/* Query graph query thread node: the fields are protected by the kernel -mutex with the exceptions named below */ +/**********************************************************************//** +Round robin scheduler. +@return a query thread of the graph moved to QUE_THR_RUNNING state, or +NULL; the query thread should be executed by que_run_threads by the +caller */ +UNIV_INTERN +que_thr_t* +que_fork_scheduler_round_robin( +/*===========================*/ + que_fork_t* fork, /*!< in: a query fork */ + que_thr_t* thr); /*!< in: current pos */ + +/*********************************************************************//** +Initialise the query sub-system. */ +UNIV_INTERN +void +que_init(void); +/*==========*/ + +/*********************************************************************//** +Close the query sub-system. */ +UNIV_INTERN +void +que_close(void); +/*===========*/ + +/* Query graph query thread node: the fields are protected by the +trx_t::mutex with the exceptions named below */ struct que_thr_struct{ que_common_t common; /*!< type: QUE_NODE_THR */ @@ -353,24 +355,15 @@ struct que_thr_struct{ corruption */ que_node_t* child; /*!< graph child node */ que_t* graph; /*!< graph where this node belongs */ + ulint state; /*!< state of the query thread */ ibool is_active; /*!< TRUE if the thread has been set to the run state in que_thr_move_to_run_state, but not deactivated in que_thr_dec_reference_count */ - ulint state; /*!< state of the query thread */ - UT_LIST_NODE_T(que_thr_t) - thrs; /*!< list of thread nodes of the fork - node */ - UT_LIST_NODE_T(que_thr_t) - trx_thrs; /*!< lists of threads in wait list of - the trx */ - UT_LIST_NODE_T(que_thr_t) - queue; /*!< list of runnable thread nodes in - the server task queue */ /*------------------------------*/ /* The following fields are private to the OS thread executing the - query thread, and are not protected by the kernel mutex: */ + query thread, and are not protected by any mutex: */ que_node_t* run_node; /*!< pointer to the node where the subgraph down from this node is @@ -381,6 +374,21 @@ struct que_thr_struct{ thus far */ ulint lock_state; /*!< lock state of thread (table or row) */ + struct srv_slot_struct* + slot; /* The thread slot in the wait + array in srv_sys_t */ + /*------------------------------*/ + /* The following fields are links for the various lists that + this type can be on. */ + UT_LIST_NODE_T(que_thr_t) + thrs; /*!< list of thread nodes of the fork + node */ + UT_LIST_NODE_T(que_thr_t) + trx_thrs; /*!< lists of threads in wait list of + the trx */ + UT_LIST_NODE_T(que_thr_t) + queue; /*!< list of runnable thread nodes in + the server task queue */ ulint fk_cascade_depth; /*!< maximum cascading call depth supported for foreign key constraint related delete/updates */ @@ -389,7 +397,7 @@ struct que_thr_struct{ #define QUE_THR_MAGIC_N 8476583 #define QUE_THR_MAGIC_FREED 123461526 -/* Query graph fork node: its fields are protected by the kernel mutex */ +/* Query graph fork node: its fields are protected by the query thread mutex */ struct que_fork_struct{ que_common_t common; /*!< type: QUE_NODE_FORK */ que_t* graph; /*!< query graph of this node */ @@ -502,7 +510,6 @@ struct que_fork_struct{ thread has done its task */ #define QUE_THR_COMMAND_WAIT 4 #define QUE_THR_LOCK_WAIT 5 -#define QUE_THR_SIG_REPLY_WAIT 6 #define QUE_THR_SUSPENDED 7 #define QUE_THR_ERROR 8 @@ -516,7 +523,6 @@ struct que_fork_struct{ #define QUE_CUR_START 2 #define QUE_CUR_END 3 - #ifndef UNIV_NONINL #include "que0que.ic" #endif diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic index bd936670e1e..eff5a86d958 100644 --- a/storage/innobase/include/que0que.ic +++ b/storage/innobase/include/que0que.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -88,7 +88,7 @@ que_node_get_type( { ut_ad(node); - return(((que_common_t*)node)->type); + return(((que_common_t*) node)->type); } /***********************************************************************//** @@ -101,7 +101,7 @@ que_node_get_val( { ut_ad(node); - return(&(((que_common_t*)node)->val)); + return(&(((que_common_t*) node)->val)); } /***********************************************************************//** @@ -115,7 +115,7 @@ que_node_get_val_buf_size( { ut_ad(node); - return(((que_common_t*)node)->val_buf_size); + return(((que_common_t*) node)->val_buf_size); } /***********************************************************************//** @@ -129,7 +129,7 @@ que_node_set_val_buf_size( { ut_ad(node); - ((que_common_t*)node)->val_buf_size = size; + ((que_common_t*) node)->val_buf_size = size; } /***********************************************************************//** @@ -143,7 +143,7 @@ que_node_set_parent( { ut_ad(node); - ((que_common_t*)node)->parent = parent; + ((que_common_t*) node)->parent = parent; } /***********************************************************************//** @@ -192,6 +192,28 @@ que_node_list_add_last( return(node_list); } +/************************************************************************* +Removes a query graph node from the list.*/ +UNIV_INLINE +que_node_t* +que_node_list_get_last( +/*===================*/ + /* out: last node in list.*/ + que_node_t* node_list) /* in: node list */ +{ + que_common_t* node; + + ut_a(node_list != NULL); + + node = (que_common_t*) node_list; + + /* We need the last element */ + while (node->brother != NULL) { + node = (que_common_t*) node->brother; + } + + return(node); +} /*********************************************************************//** Gets the next list node in a list of query graph nodes. @return next node in a list of nodes */ @@ -201,7 +223,7 @@ que_node_get_next( /*==============*/ que_node_t* node) /*!< in: node in a list */ { - return(((que_common_t*)node)->brother); + return(((que_common_t*) node)->brother); } /*********************************************************************//** @@ -236,14 +258,14 @@ que_node_get_parent( /*================*/ que_node_t* node) /*!< in: node */ { - return(((que_common_t*)node)->parent); + return(((que_common_t*) node)->parent); } /**********************************************************************//** Checks if graph, trx, or session is in a state where the query thread should be stopped. @return TRUE if should be stopped; NOTE that if the peek is made -without reserving the kernel mutex, then another peek with the mutex +without reserving the trx mutex, then another peek with the mutex reserved is necessary before deciding the actual stopping */ UNIV_INLINE ibool @@ -258,9 +280,9 @@ que_thr_peek_stop( trx = graph->trx; if (graph->state != QUE_FORK_ACTIVE - || trx->que_state == TRX_QUE_LOCK_WAIT - || (UT_LIST_GET_LEN(trx->signals) > 0 - && trx->que_state == TRX_QUE_RUNNING)) { + || trx->lock.que_state == TRX_QUE_LOCK_WAIT + || (trx->lock.que_state != TRX_QUE_ROLLING_BACK + && trx->lock.que_state != TRX_QUE_RUNNING)) { return(TRUE); } diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h index ea976074768..b165b817d87 100644 --- a/storage/innobase/include/que0types.h +++ b/storage/innobase/include/que0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h index 73ea66f4da2..6ea57fffcd2 100644 --- a/storage/innobase/include/read0read.h +++ b/storage/innobase/include/read0read.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -52,18 +52,16 @@ must be closed with ..._close. @return own: read view struct */ UNIV_INTERN read_view_t* -read_view_oldest_copy_or_open_new( -/*==============================*/ - trx_id_t cr_trx_id, /*!< in: trx_id of creating - transaction, or 0 used in purge */ +read_view_purge_open( +/*=================*/ mem_heap_t* heap); /*!< in: memory heap from which allocated */ /*********************************************************************//** -Closes a read view. */ +Remove read view from the trx_sys->view_list. */ UNIV_INTERN void -read_view_close( -/*============*/ +read_view_remove( +/*=============*/ read_view_t* view); /*!< in: read view */ /*********************************************************************//** Closes a consistent read view for MySQL. This function is called at an SQL @@ -145,9 +143,9 @@ struct read_view_struct{ ulint n_trx_ids; /*!< Number of cells in the trx_ids array */ trx_id_t* trx_ids;/*!< Additional trx ids which the read should - not see: typically, these are the active - transactions at the time when the read is - serialized, except the reading transaction + not see: typically, these are the read-write + active transactions at the time when the read + is serialized, except the reading transaction itself; the trx ids in this array are in a descending order. These trx_ids should be between the "low" and "high" water marks, diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic index 5bb5249b591..436800e1585 100644 --- a/storage/innobase/include/read0read.ic +++ b/storage/innobase/include/read0read.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -24,36 +24,6 @@ Created 2/16/1997 Heikki Tuuri *******************************************************/ /*********************************************************************//** -Gets the nth trx id in a read view. -@return trx id */ -UNIV_INLINE -trx_id_t -read_view_get_nth_trx_id( -/*=====================*/ - const read_view_t* view, /*!< in: read view */ - ulint n) /*!< in: position */ -{ - ut_ad(n < view->n_trx_ids); - - return(*(view->trx_ids + n)); -} - -/*********************************************************************//** -Sets the nth trx id in a read view. */ -UNIV_INLINE -void -read_view_set_nth_trx_id( -/*=====================*/ - read_view_t* view, /*!< in: read view */ - ulint n, /*!< in: position */ - trx_id_t trx_id) /*!< in: trx id to set */ -{ - ut_ad(n < view->n_trx_ids); - - *(view->trx_ids + n) = trx_id; -} - -/*********************************************************************//** Checks if a read view sees the specified transaction. @return TRUE if sees */ UNIV_INLINE @@ -63,33 +33,34 @@ read_view_sees_trx_id( const read_view_t* view, /*!< in: read view */ trx_id_t trx_id) /*!< in: trx id */ { - ulint n_ids; - ulint i; - if (trx_id < view->up_limit_id) { return(TRUE); - } - - if (trx_id >= view->low_limit_id) { + } else if (trx_id >= view->low_limit_id) { return(FALSE); - } - - /* We go through the trx ids in the array smallest first: this order - may save CPU time, because if there was a very long running - transaction in the trx id array, its trx id is looked at first, and - the first two comparisons may well decide the visibility of trx_id. */ - - n_ids = view->n_trx_ids; - - for (i = 0; i < n_ids; i++) { - trx_id_t view_trx_id - = read_view_get_nth_trx_id(view, n_ids - i - 1); - - if (trx_id <= view_trx_id) { - return(trx_id != view_trx_id); - } + } else { + ulint lower = 0; + ulint upper = view->n_trx_ids - 1; + + ut_a(view->n_trx_ids > 0); + + do { + ulint mid = (lower + upper) >> 1; + trx_id_t mid_id = view->trx_ids[mid]; + + if (mid_id == trx_id) { + return(FALSE); + } else if (mid_id < trx_id) { + if (mid > 0) { + upper = mid - 1; + } else { + break; + } + } else { + lower = mid + 1; + } + } while (lower <= upper); } return(TRUE); diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h index caf69e3fb51..0b6aa132b88 100644 --- a/storage/innobase/include/read0types.h +++ b/storage/innobase/include/read0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h index a908521c9f7..ed6486aa603 100644 --- a/storage/innobase/include/rem0cmp.h +++ b/storage/innobase/include/rem0cmp.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -75,6 +75,63 @@ cmp_data_data_slow( const byte* data2, /*!< in: data field (== a pointer to a memory buffer) */ ulint len2); /*!< in: data field length or UNIV_SQL_NULL */ + +/***************************************************************** +This function is used to compare two data fields for which we know the +data type to be VARCHAR. +@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */ +UNIV_INTERN +int +cmp_data_data_slow_varchar( +/*=======================*/ + const byte* lhs, /* in: data field (== a pointer to a memory + buffer) */ + ulint lhs_len,/* in: data field length or UNIV_SQL_NULL */ + const byte* rhs, /* in: data field (== a pointer to a memory + buffer) */ + ulint rhs_len);/* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** +This function is used to compare two varchar/char fields. The comparison +is for the LIKE operator. +@return 1, 0, -1, if lhs is greater, equal, less than rhs, respectively */ +UNIV_INTERN +int +cmp_data_data_slow_like_prefix( +/*===========================*/ + const byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + const byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** +This function is used to compare two varchar/char fields. The comparison +is for the LIKE operator. +@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */ +UNIV_INTERN +int +cmp_data_data_slow_like_suffix( +/*===========================*/ + const byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + const byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** +This function is used to compare two varchar/char fields. The comparison +is for the LIKE operator. +@return 1, 0, -1, if data1 is greater, equal, less than data2, respectively */ +UNIV_INTERN +int +cmp_data_data_slow_like_substr( +/*===========================*/ + const byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + const byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ /*************************************************************//** This function is used to compare two dfields where at least the first has its data type field set. @@ -192,6 +249,39 @@ cmp_rec_rec( const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */ dict_index_t* index); /*!< in: data dictionary index */ +/***************************************************************** +This function is used to compare two dfields where at least the first +has its data type field set. */ +UNIV_INTERN +int +cmp_dfield_dfield_like_prefix( +/*==========================*/ + /* out: 1, 0, -1, if dfield1 is greater, equal, + less than dfield2, respectively */ + dfield_t* dfield1,/* in: data field; must have type field set */ + dfield_t* dfield2);/* in: data field */ +/***************************************************************** +This function is used to compare two dfields where at least the first +has its data type field set. */ +UNIV_INLINE +int +cmp_dfield_dfield_like_substr( +/*==========================*/ + /* out: 1, 0, -1, if dfield1 is greater, equal, + less than dfield2, respectively */ + dfield_t* dfield1,/* in: data field; must have type field set */ + dfield_t* dfield2);/* in: data field */ +/***************************************************************** +This function is used to compare two dfields where at least the first +has its data type field set. */ +UNIV_INLINE +int +cmp_dfield_dfield_like_suffix( +/*==========================*/ + /* out: 1, 0, -1, if dfield1 is greater, equal, + less than dfield2, respectively */ + dfield_t* dfield1,/* in: data field; must have type field set */ + dfield_t* dfield2);/* in: data field */ #ifndef UNIV_NONINL #include "rem0cmp.ic" diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic index 63415fe7837..67a2dcacba1 100644 --- a/storage/innobase/include/rem0cmp.ic +++ b/storage/innobase/include/rem0cmp.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -43,6 +43,60 @@ cmp_data_data( return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2)); } +/***************************************************************** +This function is used to compare two (CHAR) data fields for the LIKE +operator. */ +UNIV_INLINE +int +cmp_data_data_like_prefix( +/*======================*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2) /* in: data field length or UNIV_SQL_NULL */ +{ + return(cmp_data_data_slow_like_prefix(data1, len1, data2, len2)); +} +/***************************************************************** +This function is used to compare two (CHAR) data fields for the LIKE +operator. */ +UNIV_INLINE +int +cmp_data_data_like_suffix( +/*======================*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2) /* in: data field length or UNIV_SQL_NULL */ +{ + return(cmp_data_data_slow_like_suffix(data1, len1, data2, len2)); +} +/***************************************************************** +This function is used to compare two (CHAR) data fields for the LIKE +operator. */ +UNIV_INLINE +int +cmp_data_data_like_substr( +/*======================*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2) /* in: data field length or UNIV_SQL_NULL */ +{ + return(cmp_data_data_slow_like_substr(data1, len1, data2, len2)); +} /*************************************************************//** This function is used to compare two dfields where at least the first has its data type field set. @@ -68,6 +122,47 @@ cmp_dfield_dfield( dfield_get_len(dfield2))); } +/***************************************************************** +This function is used to compare two dfields where at least the first +has its data type field set. */ +UNIV_INLINE +int +cmp_dfield_dfield_like_suffix( +/*==========================*/ + /* out: 1, 0, -1, if dfield1 is greater, equal, + less than dfield2, respectively */ + dfield_t* dfield1,/* in: data field; must have type field set */ + dfield_t* dfield2)/* in: data field */ +{ + ut_ad(dfield_check_typed(dfield1)); + + return(cmp_data_data_like_suffix( + (byte*) dfield_get_data(dfield1), + dfield_get_len(dfield1), + (byte*) dfield_get_data(dfield2), + dfield_get_len(dfield2))); +} + +/***************************************************************** +This function is used to compare two dfields where at least the first +has its data type field set. */ +UNIV_INLINE +int +cmp_dfield_dfield_like_substr( +/*==========================*/ + /* out: 1, 0, -1, if dfield1 is greater, equal, + less than dfield2, respectively */ + dfield_t* dfield1,/* in: data field; must have type field set */ + dfield_t* dfield2)/* in: data field */ +{ + ut_ad(dfield_check_typed(dfield1)); + + return(cmp_data_data_like_substr( + (byte*) dfield_get_data(dfield1), + dfield_get_len(dfield1), + (byte*) dfield_get_data(dfield2), + dfield_get_len(dfield2))); +} /*************************************************************//** This function is used to compare two physical records. Only the common first fields are compared. diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index 10b74d18c13..c6c70bb5f09 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -54,7 +54,7 @@ in addition to the data and the offsets */ #define REC_STATUS_INFIMUM 2 #define REC_STATUS_SUPREMUM 3 -/* The following four constants are needed in page0zip.c in order to +/* The following four constants are needed in page0zip.cc in order to efficiently compress and decompress pages. */ /* The offset of heap_no in a compact record */ @@ -480,7 +480,7 @@ ulint rec_offs_any_extern( /*================*/ const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ -#ifdef UNIV_BLOB_NULL_DEBUG +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG /******************************************************//** Determine if the offsets are for a record containing null BLOB pointers. @return first field containing a null BLOB pointer, or NULL if none found */ @@ -491,7 +491,7 @@ rec_offs_any_null_extern( const rec_t* rec, /*!< in: record */ const ulint* offsets) /*!< in: rec_get_offsets(rec) */ __attribute__((nonnull, warn_unused_result)); -#endif /* UNIV_BLOB_NULL_DEBUG */ +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ /******************************************************//** Returns nonzero if the extern bit is set in nth field of rec. @return nonzero if externally stored */ @@ -542,7 +542,11 @@ rec_set_nth_field( const ulint* offsets,/*!< in: array returned by rec_get_offsets() */ ulint n, /*!< in: index number of the field */ const void* data, /*!< in: pointer to the data if not SQL null */ - ulint len); /*!< in: length of the data or UNIV_SQL_NULL */ + ulint len); /*!< in: length of the data or UNIV_SQL_NULL. + If not SQL null, must have the same + length as the previous value. + If SQL null, previous value must be + SQL null. */ /**********************************************************//** The following function returns the data size of an old-style physical record, that is the sum of field lengths. SQL null fields diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic index dc8ed515c30..6950263fe81 100644 --- a/storage/innobase/include/rem0rec.ic +++ b/storage/innobase/include/rem0rec.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -103,7 +103,7 @@ and the shift needed to obtain each bit-field of the record. */ #define REC_OLD_HEAP_NO 5 #define REC_HEAP_NO_MASK 0xFFF8UL -#if 0 /* defined in rem0rec.h for use of page0zip.c */ +#if 0 /* defined in rem0rec.h for use of page0zip.cc */ #define REC_NEW_HEAP_NO 4 #define REC_HEAP_NO_SHIFT 3 #endif @@ -264,12 +264,12 @@ rec_get_next_ptr_const( field_value = mach_read_from_2(rec - REC_NEXT); - if (UNIV_UNLIKELY(field_value == 0)) { + if (field_value == 0) { return(NULL); } - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + if (comp) { #if UNIV_PAGE_SIZE <= 32768 /* Note that for 64 KiB pages, field_value can 'wrap around' and the debug assertion is not valid */ @@ -337,7 +337,7 @@ rec_get_next_offs( field_value = mach_read_from_2(rec - REC_NEXT); - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + if (comp) { #if UNIV_PAGE_SIZE <= 32768 /* Note that for 64 KiB pages, field_value can 'wrap around' and the debug assertion is not valid */ @@ -354,7 +354,7 @@ rec_get_next_offs( + ut_align_offset(rec, UNIV_PAGE_SIZE) < UNIV_PAGE_SIZE); #endif - if (UNIV_UNLIKELY(field_value == 0)) { + if (field_value == 0) { return(0); } @@ -410,7 +410,7 @@ rec_set_next_offs_new( ut_ad(rec); ut_ad(UNIV_PAGE_SIZE > next); - if (UNIV_UNLIKELY(!next)) { + if (!next) { field_value = 0; } else { /* The following two statements calculate @@ -418,7 +418,7 @@ rec_set_next_offs_new( as a non-negative number */ field_value = (ulint) - ((lint) next + ((lint) next - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE)); field_value &= REC_NEXT_MASK; } @@ -572,9 +572,7 @@ rec_set_n_owned_new( { rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED, REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); - if (UNIV_LIKELY_NULL(page_zip) - && UNIV_LIKELY(rec_get_status(rec) - != REC_STATUS_SUPREMUM)) { + if (page_zip && rec_get_status(rec) != REC_STATUS_SUPREMUM) { page_zip_rec_set_owned(page_zip, rec, n_owned); } } @@ -648,7 +646,7 @@ rec_get_info_and_status_bits( & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) # error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" #endif - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + if (comp) { bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec); } else { bits = rec_get_info_bits(rec, FALSE); @@ -684,16 +682,14 @@ rec_get_deleted_flag( const rec_t* rec, /*!< in: physical record */ ulint comp) /*!< in: nonzero=compact page format */ { - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { - return(UNIV_UNLIKELY( - rec_get_bit_field_1(rec, REC_NEW_INFO_BITS, - REC_INFO_DELETED_FLAG, - REC_INFO_BITS_SHIFT))); + if (comp) { + return(rec_get_bit_field_1(rec, REC_NEW_INFO_BITS, + REC_INFO_DELETED_FLAG, + REC_INFO_BITS_SHIFT)); } else { - return(UNIV_UNLIKELY( - rec_get_bit_field_1(rec, REC_OLD_INFO_BITS, - REC_INFO_DELETED_FLAG, - REC_INFO_BITS_SHIFT))); + return(rec_get_bit_field_1(rec, REC_OLD_INFO_BITS, + REC_INFO_DELETED_FLAG, + REC_INFO_BITS_SHIFT)); } } @@ -741,7 +737,7 @@ rec_set_deleted_flag_new( rec_set_info_bits_new(rec, val); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { page_zip_rec_set_deleted(page_zip, rec, flag); } } @@ -1041,7 +1037,7 @@ rec_get_nth_field_offs( ut_ad(n < rec_offs_n_fields(offsets)); ut_ad(len); - if (UNIV_UNLIKELY(n == 0)) { + if (n == 0) { offs = 0; } else { offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK; @@ -1085,10 +1081,10 @@ rec_offs_any_extern( const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ { ut_ad(rec_offs_validate(NULL, NULL, offsets)); - return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL)); + return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL); } -#ifdef UNIV_BLOB_NULL_DEBUG +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG /******************************************************//** Determine if the offsets are for a record containing null BLOB pointers. @return first field containing a null BLOB pointer, or NULL if none found */ @@ -1124,7 +1120,7 @@ rec_offs_any_null_extern( return(NULL); } -#endif /* UNIV_BLOB_NULL_DEBUG */ +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ /******************************************************//** Returns nonzero if the extern bit is set in nth field of rec. @@ -1138,8 +1134,7 @@ rec_offs_nth_extern( { ut_ad(rec_offs_validate(NULL, NULL, offsets)); ut_ad(n < rec_offs_n_fields(offsets)); - return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n] - & REC_OFFS_EXTERNAL)); + return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL); } /******************************************************//** @@ -1154,8 +1149,7 @@ rec_offs_nth_sql_null( { ut_ad(rec_offs_validate(NULL, NULL, offsets)); ut_ad(n < rec_offs_n_fields(offsets)); - return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n] - & REC_OFFS_SQL_NULL)); + return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL); } /******************************************************//** @@ -1394,7 +1388,7 @@ rec_set_nth_field( ut_ad(rec); ut_ad(rec_offs_validate(rec, NULL, offsets)); - if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) { + if (len == UNIV_SQL_NULL) { if (!rec_offs_nth_sql_null(offsets, n)) { ut_a(!rec_offs_comp(offsets)); rec_set_nth_field_sql_null(rec, n); @@ -1554,7 +1548,7 @@ rec_copy( ut_memcpy(buf, rec - extra_len, extra_len + data_len); - return((byte*)buf + extra_len); + return((byte*) buf + extra_len); } /**********************************************************//** @@ -1596,7 +1590,7 @@ rec_get_converted_size( ut_ad(dtuple); ut_ad(dtuple_check_typed(dtuple)); - ut_ad(index->type & DICT_UNIVERSAL + ut_ad(dict_index_is_univ(index) || dtuple_get_n_fields(dtuple) == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) == REC_STATUS_NODE_PTR) @@ -1616,6 +1610,41 @@ rec_get_converted_size( extra_size = rec_get_converted_extra_size( data_size, dtuple_get_n_fields(dtuple), n_ext); +#if 0 + /* This code is inactive since it may be the wrong place to add + in the size of node pointers used in parent pages AND it is not + currently needed since ha_innobase::max_supported_key_length() + ensures that the key size limit for each page size is well below + the actual limit ((free space on page / 4) - record overhead). + But those limits will need to be raised when InnoDB can + support multiple page sizes. At that time, we will need + to consider the node pointer on these universal btrees. */ + + if (dict_index_is_univ(index)) { + /* This is for the insert buffer B-tree. + All fields in the leaf tuple ascend to the + parent node plus the child page pointer. */ + + /* ibuf cannot contain externally stored fields */ + ut_ad(n_ext == 0); + + /* Add the data pointer and recompute extra_size + based on one more field. */ + data_size += REC_NODE_PTR_SIZE; + extra_size = rec_get_converted_extra_size( + data_size, + dtuple_get_n_fields(dtuple) + 1, + 0); + + /* Be sure dtuple->n_fields has this node ptr + accounted for. This function should correspond to + what rec_convert_dtuple_to_rec() needs in storage. + In optimistic insert or update-not-in-place, we will + have to ensure that if the record is converted to a + node pointer, it will not become too large.*/ + } +#endif + return(data_size + extra_size); } diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h index 7afd595be90..2f1ead43c07 100644 --- a/storage/innobase/include/rem0types.h +++ b/storage/innobase/include/rem0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -45,10 +45,21 @@ This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data files would be at risk! */ #define REC_ANTELOPE_MAX_INDEX_COL_LEN 768 -/** Maximum indexed field length for table format DICT_TF_FORMAT_ZIP and +/** Maximum indexed field length for table format UNIV_FORMAT_B and beyond. This (3072) is the maximum index row length allowed, so we cannot create index prefix column longer than that. */ #define REC_VERSION_56_MAX_INDEX_COL_LEN 3072 +/** Innodb row types are a subset of the MySQL global enum row_type. +They are made into their own enum so that switch statements can account +for each of them. */ +enum rec_format_enum { + REC_FORMAT_REDUNDANT = 0, /*!< REDUNDANT row format */ + REC_FORMAT_COMPACT = 1, /*!< COMPACT row format */ + REC_FORMAT_COMPRESSED = 2, /*!< COMPRESSED row format */ + REC_FORMAT_DYNAMIC = 3 /*!< DYNAMIC row format */ +}; +typedef enum rec_format_enum rec_format_t; + #endif diff --git a/storage/innobase/include/row0ext.h b/storage/innobase/include/row0ext.h index 557da2c4a82..60aaf16c09a 100644 --- a/storage/innobase/include/row0ext.h +++ b/storage/innobase/include/row0ext.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0ext.ic b/storage/innobase/include/row0ext.ic index 466046b2821..39e150d91d5 100644 --- a/storage/innobase/include/row0ext.ic +++ b/storage/innobase/include/row0ext.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -48,7 +48,7 @@ row_ext_lookup_ith( ut_ad(*len <= ext->max_len); ut_ad(ext->max_len > 0); - if (UNIV_UNLIKELY(*len == 0)) { + if (*len == 0) { /* The BLOB could not be fetched to the cache. */ return(field_ref_zero); } else { diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h new file mode 100644 index 00000000000..cc5efea026f --- /dev/null +++ b/storage/innobase/include/row0ftsort.h @@ -0,0 +1,287 @@ +/***************************************************************************** + +Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/row0ftsort.h +Create Full Text Index with (parallel) merge sort + +Created 10/13/2010 Jimmy Yang +*******************************************************/ + +#ifndef row0ftsort_h +#define row0ftsort_h + +#include "univ.i" +#include "data0data.h" +#include "dict0types.h" +#include "row0mysql.h" +#include "fts0fts.h" +#include "fts0types.h" +#include "fts0priv.h" +#include "row0merge.h" + +/** This structure defineds information the scan thread will fetch +and put to the linked list for parallel tokenization/sort threads +to process */ +typedef struct fts_doc_item fts_doc_item_t; + +/** Information about temporary files used in merge sort */ +struct fts_doc_item { + dfield_t* field; /*!< field contains document string */ + doc_id_t doc_id; /*!< document ID */ + UT_LIST_NODE_T(fts_doc_item_t) doc_list; + /*!< list of doc items */ +}; + +/** This defines the list type that scan thread would feed the parallel +tokenization threads and sort threads. */ +typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t; + +#define FTS_NUM_AUX_INDEX 6 +#define FTS_PLL_MERGE 1 + +/** Sort information passed to each individual parallel sort thread */ +typedef struct fts_psort_struct fts_psort_t; + +/** Common info passed to each parallel sort thread */ +struct fts_psort_common_struct { + struct TABLE* table; /*!< MySQL table */ + dict_table_t* new_table; /*!< source table */ + trx_t* trx; /*!< transaction */ + dict_index_t* sort_index; /*!< FTS index */ + fts_psort_t* all_info; /*!< all parallel sort info */ + os_event_t sort_event; /*!< sort event */ + ibool opt_doc_id_size;/*!< whether to use 4 bytes + instead of 8 bytes integer to + store Doc ID during sort, if + Doc ID will not be big enough + to use 8 bytes value */ +}; + +typedef struct fts_psort_common_struct fts_psort_common_t; + +struct fts_psort_struct { + ulint psort_id; /*!< Parallel sort ID */ + row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX]; + /*!< sort buffer */ + merge_file_t* merge_file[FTS_NUM_AUX_INDEX]; + /*!< sort file */ + row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX]; + /*!< buffer to write to file */ + row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX]; + /*!< buffer to allocated */ + ulint child_status; /*!< child thread status */ + ulint state; /*!< child thread state */ + fts_doc_list_t fts_doc_list; /*!< doc list to process */ + fts_psort_common_t* psort_common; /*!< ptr to all psort info */ +}; + +/** Structure stores information from string tokenization operation */ +struct fts_tokenize_ctx { + ulint processed_len; /*!< processed string length */ + ulint init_pos; /*!< doc start position */ + ulint buf_used; /*!< the sort buffer (ID) when + tokenization stops, which + could due to sort buffer full */ + ulint rows_added[FTS_NUM_AUX_INDEX]; + /*!< number of rows added for + each FTS index partition */ + ib_rbt_t* cached_stopword;/*!< in: stopword list */ + dfield_t sort_field[FTS_NUM_FIELDS_SORT]; + /*!< in: sort field */ +}; + +typedef struct fts_tokenize_ctx fts_tokenize_ctx_t; + +/** Structure stores information needed for the insertion phase of FTS +parallel sort. */ +struct fts_psort_insert { + trx_t* trx; /*!< Transaction used for insertion */ + que_t** ins_graph; /*!< insert graph */ + fts_table_t fts_table; /*!< auxiliary table */ + CHARSET_INFO* charset; /*!< charset info */ + mem_heap_t* heap; /*!< heap */ + ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes) + integer for Doc ID */ +}; + +typedef struct fts_psort_insert fts_psort_insert_t; + + +/** status bit used for communication between parent and child thread */ +#define FTS_PARENT_COMPLETE 1 +#define FTS_CHILD_COMPLETE 1 + +/** Print some debug information */ +#define FTSORT_PRINT + +#ifdef FTSORT_PRINT +#define DEBUG_FTS_SORT_PRINT(str) \ + do { \ + ut_print_timestamp(stderr); \ + fprintf(stderr, str); \ + } while (0) +#else +#define DEBUG_FTS_SORT_PRINT(str) +#endif /* FTSORT_PRINT */ + +/*************************************************************//** +Create a temporary "fts sort index" used to merge sort the +tokenized doc string. The index has three "fields": + +1) Tokenized word, +2) Doc ID +3) Word's position in original 'doc'. + +@return dict_index_t structure for the fts sort index */ +UNIV_INTERN +dict_index_t* +row_merge_create_fts_sort_index( +/*============================*/ + dict_index_t* index, /*!< in: Original FTS index + based on which this sort index + is created */ + const dict_table_t* table, /*!< in: table that FTS index + is being created on */ + ibool* opt_doc_id_size); + /*!< out: whether to use 4 bytes + instead of 8 bytes integer to + store Doc ID during sort */ + +/********************************************************************//** +Initialize FTS parallel sort structures. +@return TRUE if all successful */ +UNIV_INTERN +ibool +row_fts_psort_info_init( +/*====================*/ + trx_t* trx, /*!< in: transaction */ + struct TABLE* table, /*!< in: MySQL table object */ + const dict_table_t* new_table,/*!< in: table where indexes are + created */ + dict_index_t* index, /*!< in: FTS index to be created */ + ibool opt_doc_id_size, + /*!< in: whether to use 4 bytes + instead of 8 bytes integer to + store Doc ID during sort */ + fts_psort_t** psort, /*!< out: parallel sort info to be + instantiated */ + fts_psort_t** merge); /*!< out: parallel merge info + to be instantiated */ +/********************************************************************//** +Clean up and deallocate FTS parallel sort structures, and close +temparary merge sort files */ +UNIV_INTERN +void +row_fts_psort_info_destroy( +/*=======================*/ + fts_psort_t* psort_info, /*!< parallel sort info */ + fts_psort_t* merge_info); /*!< parallel merge info */ +/********************************************************************//** +Free up merge buffers when merge sort is done */ +UNIV_INTERN +void +row_fts_free_pll_merge_buf( +/*=======================*/ + fts_psort_t* psort_info); /*!< in: parallel sort info */ + +/*********************************************************************//** +Function performs parallel tokenization of the incoming doc strings. +@return OS_THREAD_DUMMY_RETURN */ +UNIV_INTERN +os_thread_ret_t +fts_parallel_tokenization( +/*======================*/ + void* arg); /*!< in: psort_info for the thread */ +/*********************************************************************//** +Start the parallel tokenization and parallel merge sort */ +UNIV_INTERN +void +row_fts_start_psort( +/*================*/ + fts_psort_t* psort_info); /*!< in: parallel sort info */ +/*********************************************************************//** +Function performs the merge and insertion of the sorted records. +@return OS_THREAD_DUMMY_RETURN */ +UNIV_INTERN +os_thread_ret_t +fts_parallel_merge( +/*===============*/ + void* arg); /*!< in: parallel merge info */ +/*********************************************************************//** +Kick off the parallel merge and insert thread */ +UNIV_INTERN +void +row_fts_start_parallel_merge( +/*=========================*/ + fts_psort_t* merge_info); /*!< in: parallel sort info */ +/********************************************************************//** +Insert processed FTS data to the auxillary tables. +@return DB_SUCCESS if insertion runs fine */ +UNIV_INTERN +ulint +row_merge_write_fts_word( +/*=====================*/ + trx_t* trx, /*!< in: transaction */ + que_t** ins_graph, /*!< in: Insert query graphs */ + fts_tokenizer_word_t*word, /*!< in: sorted and tokenized + word */ + fts_table_t* fts_table, /*!< in: fts aux table instance */ + CHARSET_INFO* charset); /*!< in: charset */ +/********************************************************************//** +Read sorted FTS data files and insert data tuples to auxillary tables. +@return DB_SUCCESS or error number */ +UNIV_INTERN +void +row_fts_insert_tuple( +/*=================*/ + fts_psort_insert_t* + ins_ctx, /*!< in: insert context */ + fts_tokenizer_word_t* word, /*!< in: last processed + tokenized word */ + ib_vector_t* positions, /*!< in: word position */ + doc_id_t* in_doc_id, /*!< in: last item doc id */ + dtuple_t* dtuple); /*!< in: entry to insert */ +/********************************************************************//** +Propagate a newly added record up one level in the selection tree +@return parent where this value propagated to */ +UNIV_INTERN +int +row_merge_fts_sel_propagate( +/*========================*/ + int propogated, /*<! in: tree node propagated */ + int* sel_tree, /*<! in: selection tree */ + ulint level, /*<! in: selection tree level */ + const mrec_t** mrec, /*<! in: sort record */ + ulint** offsets, /*<! in: record offsets */ + dict_index_t* index); /*<! in: FTS index */ +/********************************************************************//** +Read sorted file containing index data tuples and insert these data +tuples to the index +@return DB_SUCCESS or error number */ +UNIV_INTERN +ulint +row_fts_merge_insert( +/*=================*/ + dict_index_t* index, /*!< in: index */ + dict_table_t* table, /*!< in: new table */ + fts_psort_t* psort_info, /*!< parallel sort info */ + ulint id); /* !< in: which auxiliary table's data + to insert to */ + +#endif /* row0ftsort_h */ diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h index 810973e61a7..54ad7241a4f 100644 --- a/storage/innobase/include/row0ins.h +++ b/storage/innobase/include/row0ins.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0ins.ic b/storage/innobase/include/row0ins.ic index 84f6da255bf..9c191d869a2 100644 --- a/storage/innobase/include/row0ins.ic +++ b/storage/innobase/include/row0ins.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h index be7c77e7724..c4e2f5ddf41 100644 --- a/storage/innobase/include/row0merge.h +++ b/storage/innobase/include/row0merge.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 2005, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -38,6 +38,58 @@ Created 13/06/2005 Jan Lindstrom #include "btr0types.h" #include "row0mysql.h" #include "lock0types.h" +#include "srv0srv.h" + +/** @brief Block size for I/O operations in merge sort. + +The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty() +rounded to a power of 2. + +When not creating a PRIMARY KEY that contains column prefixes, this +can be set as small as UNIV_PAGE_SIZE / 2. See the comment above +ut_ad(data_size < sizeof(row_merge_block_t)). */ +typedef byte row_merge_block_t; + +/** @brief Secondary buffer for I/O operations of merge records. + +This buffer is used for writing or reading a record that spans two +row_merge_block_t. Thus, it must be able to hold one merge record, +whose maximum size is the same as the minimum size of +row_merge_block_t. */ +typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX]; + +/** @brief Merge record in row_merge_block_t. + +The format is the same as a record in ROW_FORMAT=COMPACT with the +exception that the REC_N_NEW_EXTRA_BYTES are omitted. */ +typedef byte mrec_t; + +/** Buffer for sorting in main memory. */ +struct row_merge_buf_struct { + mem_heap_t* heap; /*!< memory heap where allocated */ + dict_index_t* index; /*!< the index the tuples belong to */ + ulint total_size; /*!< total amount of data bytes */ + ulint n_tuples; /*!< number of data tuples */ + ulint max_tuples; /*!< maximum number of data tuples */ + const dfield_t**tuples; /*!< array of pointers to + arrays of fields that form + the data tuples */ + const dfield_t**tmp_tuples; /*!< temporary copy of tuples, + for sorting */ +}; + +/** Buffer for sorting in main memory. */ +typedef struct row_merge_buf_struct row_merge_buf_t; + +/** Information about temporary files used in merge sort */ +struct merge_file_struct { + int fd; /*!< file descriptor */ + ulint offset; /*!< file offset (end of file) */ + ib_uint64_t n_rec; /*!< number of records in the file */ +}; + +/** Information about temporary files used in merge sort */ +typedef struct merge_file_struct merge_file_t; /** Index field definition */ struct merge_index_field_struct { @@ -47,7 +99,7 @@ struct merge_index_field_struct { }; /** Index field definition */ -typedef struct merge_index_field_struct merge_index_field_t; +typedef struct merge_index_field_struct merge_index_field_t; /** Definition of an index being created */ struct merge_index_def_struct { @@ -60,7 +112,17 @@ struct merge_index_def_struct { }; /** Definition of an index being created */ -typedef struct merge_index_def_struct merge_index_def_t; +typedef struct merge_index_def_struct merge_index_def_t; + +/** Structure for reporting duplicate records. */ +struct row_merge_dup_struct { + const dict_index_t* index; /*!< index being sorted */ + struct TABLE* table; /*!< MySQL table object */ + ulint n_dup; /*!< number of duplicates */ +}; + +/** Structure for reporting duplicate records. */ +typedef struct row_merge_dup_struct row_merge_dup_t; /*********************************************************************//** Sets an exclusive lock on a table, for the duration of creating indexes. @@ -95,7 +157,8 @@ row_merge_drop_indexes( trx_t* trx, /*!< in: transaction */ dict_table_t* table, /*!< in: table containing the indexes */ dict_index_t** index, /*!< in: indexes to drop */ - ulint num_created); /*!< in: number of elements in index[] */ + ulint num_created); /*!< in: number of elements in + index[] */ /*********************************************************************//** Drop all partially created indexes during crash recovery. */ UNIV_INTERN @@ -117,7 +180,6 @@ row_merge_rename_tables( old_table->name */ const char* tmp_name, /*!< in: new name for old_table */ trx_t* trx); /*!< in: transaction handle */ - /*********************************************************************//** Create a temporary table for creating a primary key, using the definition of an existing table. @@ -173,7 +235,6 @@ row_merge_drop_table( /*=================*/ trx_t* trx, /*!< in: transaction */ dict_table_t* table); /*!< in: table instance to drop */ - /*********************************************************************//** Build indexes on a table by reading a clustered index, creating a temporary file containing index entries, merge sorting @@ -194,4 +255,130 @@ row_merge_build_indexes( struct TABLE* table); /*!< in/out: MySQL table, for reporting erroneous key value if applicable */ +/********************************************************************//** +Write a buffer to a block. */ +UNIV_INTERN +void +row_merge_buf_write( +/*================*/ + const row_merge_buf_t* buf, /*!< in: sorted buffer */ + const merge_file_t* of, /*!< in: output file */ + row_merge_block_t* block); /*!< out: buffer for writing to file */ +/********************************************************************//** +Sort a buffer. */ +UNIV_INTERN +void +row_merge_buf_sort( +/*===============*/ + row_merge_buf_t* buf, /*!< in/out: sort buffer */ + row_merge_dup_t* dup); /*!< in/out: for reporting duplicates */ +/********************************************************************//** +Write a merge block to the file system. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INTERN +ibool +row_merge_write( +/*============*/ + int fd, /*!< in: file descriptor */ + ulint offset, /*!< in: offset where to write, + in number of row_merge_block_t elements */ + const void* buf); /*!< in: data */ +/********************************************************************//** +Empty a sort buffer. +@return sort buffer */ +UNIV_INTERN +row_merge_buf_t* +row_merge_buf_empty( +/*================*/ + row_merge_buf_t* buf); /*!< in,own: sort buffer */ +/*********************************************************************//** +Create a merge file. */ +UNIV_INTERN +void +row_merge_file_create( +/*==================*/ + merge_file_t* merge_file); /*!< out: merge file structure */ +/*********************************************************************//** +Merge disk files. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +row_merge_sort( +/*===========*/ + trx_t* trx, /*!< in: transaction */ + const dict_index_t* index, /*!< in: index being created */ + merge_file_t* file, /*!< in/out: file containing + index entries */ + row_merge_block_t* block, /*!< in/out: 3 buffers */ + int* tmpfd, /*!< in/out: temporary file handle */ + struct TABLE* table); /*!< in/out: MySQL table, for + reporting erroneous key value + if applicable */ +/*********************************************************************//** +Allocate a sort buffer. +@return own: sort buffer */ +UNIV_INTERN +row_merge_buf_t* +row_merge_buf_create( +/*=================*/ + dict_index_t* index); /*!< in: secondary index */ +/*********************************************************************//** +Deallocate a sort buffer. */ +UNIV_INTERN +void +row_merge_buf_free( +/*===============*/ + row_merge_buf_t* buf); /*!< in,own: sort buffer, to be freed */ +/*********************************************************************//** +Destroy a merge file. */ +UNIV_INTERN +void +row_merge_file_destroy( +/*===================*/ + merge_file_t* merge_file); /*!< out: merge file structure */ +/*********************************************************************//** +Compare two merge records. +@return 1, 0, -1 if mrec1 is greater, equal, less, respectively, than mrec2 */ +UNIV_INTERN +int +row_merge_cmp( +/*==========*/ + const mrec_t* mrec1, /*!< in: first merge + record to be compared */ + const mrec_t* mrec2, /*!< in: second merge + record to be compared */ + const ulint* offsets1, /*!< in: first record offsets */ + const ulint* offsets2, /*!< in: second record offsets */ + const dict_index_t* index, /*!< in: index */ + ibool* null_eq); /*!< out: set to TRUE if + found matching null values */ +/********************************************************************//** +Read a merge block from the file system. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INTERN +ibool +row_merge_read( +/*===========*/ + int fd, /*!< in: file descriptor */ + ulint offset, /*!< in: offset where to read + in number of row_merge_block_t + elements */ + row_merge_block_t* buf); /*!< out: data */ +/********************************************************************//** +Read a merge record. +@return pointer to next record, or NULL on I/O error or end of list */ +UNIV_INTERN __attribute__((nonnull)) +const byte* +row_merge_read_rec( +/*===============*/ + row_merge_block_t* block, /*!< in/out: file buffer */ + mrec_buf_t* buf, /*!< in/out: secondary buffer */ + const byte* b, /*!< in: pointer to record */ + const dict_index_t* index, /*!< in: index of the record */ + int fd, /*!< in: file descriptor */ + ulint* foffs, /*!< in/out: file offset */ + const mrec_t** mrec, /*!< out: pointer to merge record, + or NULL on end of list + (non-NULL on I/O error) */ + ulint* offsets);/*!< out: offsets of mrec */ #endif /* row0merge.h */ diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h index e17fd584110..17a29e38ec7 100644 --- a/storage/innobase/include/row0mysql.h +++ b/storage/innobase/include/row0mysql.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 2000, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -116,7 +116,7 @@ row_mysql_pad_col( /**************************************************************//** Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format. The counterpart of this function is row_sel_field_store_in_mysql_format() in -row0sel.c. +row0sel.cc. @return up to which byte we used buf in the conversion */ UNIV_INTERN byte* @@ -127,7 +127,10 @@ row_mysql_store_col_in_innobase_format( this function is called! */ byte* buf, /*!< in/out: buffer for a converted integer value; this must be at least - col_len long then! */ + col_len long then! NOTE that dfield + may also get a pointer to 'buf', + therefore do not discard this as long + as dfield is used! */ ibool row_format_col, /*!< TRUE if the mysql_data is from a MySQL row, FALSE if from a MySQL key value; @@ -190,15 +193,6 @@ row_update_prebuilt_trx( in MySQL handle */ trx_t* trx); /*!< in: transaction handle */ /*********************************************************************//** -Unlocks AUTO_INC type locks that were possibly reserved by a trx. This -function should be called at the the end of an SQL statement, by the -connection thread that owns the transaction (trx->mysql_thd). */ -UNIV_INTERN -void -row_unlock_table_autoinc_for_mysql( -/*===============================*/ - trx_t* trx); /*!< in/out: transaction */ -/*********************************************************************//** Sets an AUTO_INC type lock on the table mentioned in prebuilt. The AUTO_INC lock gives exclusive access to the auto-inc counter of the table. The lock is reserved only for the duration of an SQL statement. @@ -415,7 +409,7 @@ row_table_add_foreign_constraints( any foreign keys are found. */ /*********************************************************************//** -The master thread in srv0srv.c calls this regularly to drop tables which +The master thread in srv0srv.cc calls this regularly to drop tables which we must drop in background after queries to them have ended. Such lazy dropping of tables is needed in ALTER TABLE on Unix. @return how many tables dropped + remaining tables in list */ @@ -528,6 +522,20 @@ row_is_magic_monitor_table( const char* table_name); /*!< in: name of the table, in the form database/table_name */ +/*********************************************************************//** +Initialize this module */ +UNIV_INTERN +void +row_mysql_init(void); +/*================*/ + +/*********************************************************************//** +Close this module */ +UNIV_INTERN +void +row_mysql_close(void); +/*=================*/ + /* A struct describing a place for an individual column in the MySQL row format which is presented to the table handler in ha_innobase. This template struct is used to speed up row transformations between @@ -544,6 +552,10 @@ struct mysql_row_templ_struct { Innobase record in the clustered index; not defined if template_type is ROW_MYSQL_WHOLE_ROW */ + ulint icp_rec_field_no; /*!< field number of the column in an + Innobase record in the current index; + not defined unless + index condition pushdown is used */ ulint mysql_col_offset; /*!< offset of the column in the MySQL row format */ ulint mysql_col_len; /*!< length of the column in the MySQL @@ -686,6 +698,12 @@ struct row_prebuilt_struct { generated, the row id of the last row fetched is stored here */ + doc_id_t fts_doc_id; /* if the table has an FTS index on + it then we fetch the doc_id. + FTS-FIXME: Currently we fetch it always + but in the future we must only fetch + it when FTS columns are being + updated */ dtuple_t* clust_ref; /*!< prebuilt dtuple used in sel/upd/del */ ulint select_lock_type;/*!< LOCK_NONE, LOCK_S, or LOCK_X */ @@ -762,6 +780,7 @@ struct row_prebuilt_struct { to this heap */ mem_heap_t* old_vers_heap; /*!< memory heap where a previous version is built in consistent read */ + fts_result_t* result; /* The result of an FTS query */ /*----------------------*/ ulonglong autoinc_last_value; /*!< last value of AUTO-INC interval */ @@ -778,6 +797,13 @@ struct row_prebuilt_struct { store it here so that we can return it to MySQL */ /*----------------------*/ + void* idx_cond; /*!< In ICP, pointer to a ha_innobase, + passed to innobase_index_cond(). + NULL if index condition pushdown is + not used. */ + ulint idx_cond_n_cols;/*!< Number of fields in idx_cond_cols. + 0 if and only if idx_cond == NULL. */ + /*----------------------*/ ulint magic_n2; /*!< this should be the same as magic_n */ }; diff --git a/storage/innobase/include/row0mysql.ic b/storage/innobase/include/row0mysql.ic index 35033aa2ad1..2eb60898c46 100644 --- a/storage/innobase/include/row0mysql.ic +++ b/storage/innobase/include/row0mysql.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2001, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2001, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h index 485d51dbc83..740771fa3eb 100644 --- a/storage/innobase/include/row0purge.h +++ b/storage/innobase/include/row0purge.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -34,6 +34,8 @@ Created 3/14/1997 Heikki Tuuri #include "trx0types.h" #include "que0types.h" #include "row0types.h" +#include "row0purge.h" +#include "ut0vec.h" /********************************************************************//** Creates a purge node to a query graph. @@ -42,8 +44,9 @@ UNIV_INTERN purge_node_t* row_purge_node_create( /*==================*/ - que_thr_t* parent, /*!< in: parent node, i.e., a thr node */ - mem_heap_t* heap); /*!< in: memory heap where created */ + que_thr_t* parent, /*!< in: parent node, i.e., a + thr node */ + mem_heap_t* heap); /*!< in: memory heap where created */ /***********************************************************//** Determines if it is possible to remove a secondary index entry. Removal is possible if the secondary index entry does not refer to any @@ -83,20 +86,16 @@ struct purge_node_struct{ /*----------------------*/ /* Local storage for this graph node */ roll_ptr_t roll_ptr;/* roll pointer to undo log record */ - trx_undo_rec_t* undo_rec;/* undo log record */ - trx_undo_inf_t* reservation;/* reservation for the undo log record in - the purge array */ + ib_vector_t* undo_recs;/*!< Undo recs to purge */ + undo_no_t undo_no;/* undo number of the record */ + ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC, ... */ - btr_pcur_t pcur; /*!< persistent cursor used in searching the - clustered index record */ - ibool found_clust;/* TRUE if the clustered index record - determined by ref was found in the clustered - index, and we were able to position pcur on - it */ dict_table_t* table; /*!< table where purge is done */ + ulint cmpl_info;/* compiler analysis info of an update */ + upd_t* update; /*!< update vector for a clustered index record */ dtuple_t* ref; /*!< NULL, or row reference to the next row to @@ -109,6 +108,14 @@ struct purge_node_struct{ mem_heap_t* heap; /*!< memory heap used as auxiliary storage for row; this must be emptied after a successful purge of a row */ + ibool found_clust;/* TRUE if the clustered index record + determined by ref was found in the clustered + index, and we were able to position pcur on + it */ + btr_pcur_t pcur; /*!< persistent cursor used in searching the + clustered index record */ + ibool done; /* Debug flag */ + }; #ifndef UNIV_NONINL diff --git a/storage/innobase/include/row0purge.ic b/storage/innobase/include/row0purge.ic index 23d7d3845a4..700106d1048 100644 --- a/storage/innobase/include/row0purge.ic +++ b/storage/innobase/include/row0purge.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h index c2849be7c3e..cf253ab2347 100644 --- a/storage/innobase/include/row0row.h +++ b/storage/innobase/include/row0row.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -294,10 +294,7 @@ row_search_index_entry( /* The allowed latching order of index records is the following: (1) a secondary index record -> (2) the clustered index record -> -(3) rollback segment data for the clustered index record. - -No new latches may be obtained while the kernel mutex is reserved. -However, the kernel mutex can be reserved while latches are owned. */ +(3) rollback segment data for the clustered index record. */ /*******************************************************************//** Formats the raw data in "data" (in InnoDB on-disk format) using diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic index 0b9ca982af8..8e9f3460519 100644 --- a/storage/innobase/include/row0row.ic +++ b/storage/innobase/include/row0row.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h index 1c4ea6f7244..fa3c93b6b9a 100644 --- a/storage/innobase/include/row0sel.h +++ b/storage/innobase/include/row0sel.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -179,6 +179,15 @@ row_search_check_if_query_cache_permitted( trx_t* trx, /*!< in: transaction object */ const char* norm_name); /*!< in: concatenation of database name, '/' char, table name */ +void +row_create_key( +/*===========*/ + dtuple_t* tuple, /* in: tuple where to build; + NOTE: we assume that the type info + in the tuple is already according + to index! */ + dict_index_t* index, /* in: index of the key value */ + doc_id_t* doc_id); /* in: doc id to lookup.*/ /*******************************************************************//** Read the max AUTOINC value from an index. @return DB_SUCCESS if all OK else error code */ diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic index 5907f9913da..d83a3448832 100644 --- a/storage/innobase/include/row0sel.ic +++ b/storage/innobase/include/row0sel.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -92,7 +92,7 @@ open_step( } } - if (UNIV_EXPECT(err, DB_SUCCESS) != DB_SUCCESS) { + if (err != DB_SUCCESS) { /* SQL error detected */ fprintf(stderr, "SQL error %lu\n", (ulong) err); diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h index 7d6a7c8e2b1..463651b43b8 100644 --- a/storage/innobase/include/row0types.h +++ b/storage/innobase/include/row0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h index 77b071c3a6b..5f3a7212ee1 100644 --- a/storage/innobase/include/row0uins.h +++ b/storage/innobase/include/row0uins.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0uins.ic b/storage/innobase/include/row0uins.ic index 27606150d8e..54da2e49874 100644 --- a/storage/innobase/include/row0uins.ic +++ b/storage/innobase/include/row0uins.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h index ed44cc8d601..84831e59d90 100644 --- a/storage/innobase/include/row0umod.h +++ b/storage/innobase/include/row0umod.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0umod.ic b/storage/innobase/include/row0umod.ic index ea3fd3b43c7..00a8cd86e01 100644 --- a/storage/innobase/include/row0umod.ic +++ b/storage/innobase/include/row0umod.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h index 6eb4ca448b3..90a15172ae0 100644 --- a/storage/innobase/include/row0undo.h +++ b/storage/innobase/include/row0undo.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0undo.ic b/storage/innobase/include/row0undo.ic index dc788debc14..b97ffca590e 100644 --- a/storage/innobase/include/row0undo.ic +++ b/storage/innobase/include/row0undo.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h index c275c1da78e..a7687bb1ded 100644 --- a/storage/innobase/include/row0upd.h +++ b/storage/innobase/include/row0upd.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -304,6 +304,37 @@ row_upd_changes_ord_field_binary_func( row_upd_changes_ord_field_binary_func(index,update,row,ext) #endif /* UNIV_DEBUG */ /***********************************************************//** +Checks if an FTS indexed column is affected by an UPDATE. +@return offset within fts_t::indexes if FTS indexed column updated else +ULINT_UNDEFINED */ +UNIV_INTERN +ulint +row_upd_changes_fts_column( +/*=======================*/ + dict_table_t* table, /*!< in: table */ + upd_field_t* upd_field); /*!< in: field to check */ +/***********************************************************//** +Checks if an FTS Doc ID column is affected by an UPDATE. +@return TRUE if Doc ID column is affected */ +UNIV_INTERN +ulint +row_upd_changes_doc_id( +/*===================*/ + dict_table_t* table, /*!< in: table */ + upd_field_t* upd_field); /*!< in: field to check */ +/***********************************************************//** +Checks if an update vector changes the table's FTS-indexed columns. +NOTE: must not be called for tables which do not have an FTS-index. +Also, the vector returned must be explicitly freed as it's allocated +using the ut_malloc() allocator. +@return vector of FTS indexes that were affected by the update else NULL */ +UNIV_INTERN +ib_vector_t* +row_upd_changes_fts_columns( +/*========================*/ + dict_table_t* table, /*!< in: table */ + upd_t* update); /*!< in: update vector for the row */ +/***********************************************************//** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering fields in the index is small. Otherwise, this can be quadratic. @@ -369,7 +400,7 @@ row_upd_index_parse( struct upd_field_struct{ unsigned field_no:16; /*!< field number in an index, usually the clustered index, but in updating - a secondary index record in btr0cur.c + a secondary index record in btr0cur.cc this is the position in the secondary index */ #ifndef UNIV_HOTBACKUP diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic index 10646241125..8a2543eaac9 100644 --- a/storage/innobase/include/row0upd.ic +++ b/storage/innobase/include/row0upd.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -43,12 +43,12 @@ upd_create( { upd_t* update; - update = (upd_t*) mem_heap_alloc(heap, sizeof(upd_t)); + update = (upd_t*) mem_heap_zalloc(heap, sizeof(upd_t)); update->info_bits = 0; update->n_fields = n; update->fields = (upd_field_t*) - mem_heap_alloc(heap, sizeof(upd_field_t) * n); + mem_heap_zalloc(heap, sizeof(upd_field_t) * n); return(update); } @@ -102,7 +102,7 @@ upd_field_set_field_no( upd_field->field_no = field_no; upd_field->orig_len = 0; - if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) { + if (field_no >= dict_index_get_n_fields(index)) { fprintf(stderr, "InnoDB: Error: trying to access field %lu in ", (ulong) field_no); @@ -157,7 +157,7 @@ row_upd_rec_sys_fields( ut_ad(dict_index_is_clust(index)); ut_ad(rec_offs_validate(rec, index, offsets)); - if (UNIV_LIKELY_NULL(page_zip)) { + if (page_zip) { ulint pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets, pos, trx->id, roll_ptr); diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h index 5a2e38230d5..d9e3471b3dc 100644 --- a/storage/innobase/include/row0vers.h +++ b/storage/innobase/include/row0vers.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -37,13 +37,15 @@ Created 2/6/1997 Heikki Tuuri /*****************************************************************//** Finds out if an active transaction has inserted or modified a secondary -index record. NOTE: the kernel mutex is temporarily released in this -function! -@return NULL if committed, else the active transaction */ +index record. +@return 0 if committed, else the active transaction id; +NOTE that this function can return false positives but never false +negatives. The caller must confirm all positive results by calling +trx_is_active() while holding lock_sys->mutex. */ UNIV_INTERN -trx_t* -row_vers_impl_x_locked_off_kernel( -/*==============================*/ +trx_id_t +row_vers_impl_x_locked( +/*===================*/ const rec_t* rec, /*!< in: record in a secondary index */ dict_index_t* index, /*!< in: the secondary index */ const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */ diff --git a/storage/innobase/include/row0vers.ic b/storage/innobase/include/row0vers.ic index 8bb3a5c0cb3..ef43a55bf70 100644 --- a/storage/innobase/include/row0vers.ic +++ b/storage/innobase/include/row0vers.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1997, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/srv0conc.h b/storage/innobase/include/srv0conc.h new file mode 100644 index 00000000000..9aee1b17bf0 --- /dev/null +++ b/storage/innobase/include/srv0conc.h @@ -0,0 +1,111 @@ +/***************************************************************************** + +Copyright (c) 2011, Oracle and/or its affiliates. All Rights Reserved. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file srv/srv0conc.h + +InnoDB concurrency manager header file + +Created 2011/04/18 Sunny Bains +*******************************************************/ + +#ifndef srv_conc_h +#define srv_conc_h + +/** We are prepared for a situation that we have this many threads waiting for +a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the +value. */ + +extern ulint srv_max_n_threads; + +/** The following controls how many threads we let inside InnoDB concurrently: +threads waiting for locks are not counted into the number because otherwise +we could get a deadlock. Value of 0 will disable the concurrency check. */ + +extern ulong srv_thread_concurrency; + +/*********************************************************************//** +Initialise the concurrency management data structures */ +void +srv_conc_init(void); +/*===============*/ + +/*********************************************************************//** +Free the concurrency management data structures */ +void +srv_conc_free(void); +/*===============*/ + +/*********************************************************************//** +Puts an OS thread to wait if there are too many concurrent threads +(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ +UNIV_INTERN +void +srv_conc_enter_innodb( +/*==================*/ + trx_t* trx); /*!< in: transaction object associated + with the thread */ + +/*********************************************************************//** +This lets a thread enter InnoDB regardless of the number of threads inside +InnoDB. This must be called when a thread ends a lock wait. */ +UNIV_INTERN +void +srv_conc_force_enter_innodb( +/*========================*/ + trx_t* trx); /*!< in: transaction object associated with + the thread */ + +/*********************************************************************//** +This must be called when a thread exits InnoDB in a lock wait or at the +end of an SQL statement. */ +UNIV_INTERN +void +srv_conc_force_exit_innodb( +/*=======================*/ + trx_t* trx); /*!< in: transaction object associated with + the thread */ + +/*********************************************************************//** +Get the count of threads waiting inside InnoDB. */ +UNIV_INTERN +ulint +srv_conc_get_waiting_threads(void); +/*==============================*/ + +/*********************************************************************//** +Get the count of threads active inside InnoDB. */ +UNIV_INTERN +ulint +srv_conc_get_active_threads(void); +/*==============================*/ + +#endif /* srv_conc_h */ diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h new file mode 100644 index 00000000000..5e47f82f416 --- /dev/null +++ b/storage/innobase/include/srv0mon.h @@ -0,0 +1,817 @@ +/*********************************************************************** + +Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +***********************************************************************/ + +/**************************************************//** +@file include/srv0mon.h +Server monitor counter related defines + +Created 12/15/2009 Jimmy Yang +*******************************************************/ + +#ifndef srv0mon_h +#define srv0mon_h + +#include "univ.i" +#ifndef UNIV_HOTBACKUP + + +/** Possible status values for "mon_status" in "struct monitor_value" */ +enum monitor_running_status { + MONITOR_STARTED = 1, /*!< Monitor has been turned on */ + MONITOR_STOPPED = 2 /*!< Monitor has been turned off */ +}; + +typedef enum monitor_running_status monitor_running_t; + +/** Monitor counter value type */ +typedef ib_int64_t mon_type_t; + +/** Two monitor structures are defined in this file. One is +"monitor_value_t" which contains dynamic counter values for each +counter. The other is "monitor_info_t", which contains +static information (counter name, desc etc.) for each counter. +In addition, an enum datatype "monitor_id_t" is also defined, +it identifies each monitor with an internally used symbol, whose +integer value indexes into above two structure for its dynamic +and static information. +Developer who intend to add new counters would require to +fill in counter information as described in "monitor_info_t" and +create the internal counter ID in "monitor_id_t". */ + +/** Structure containing the actual values of a monitor counter. */ +struct monitor_value_struct { + ib_time_t mon_start_time; /*!< Start time of monitoring */ + ib_time_t mon_stop_time; /*!< Stop time of monitoring */ + ib_time_t mon_reset_time; /*!< Time counter resetted */ + mon_type_t mon_value; /*!< Current counter Value */ + mon_type_t mon_max_value; /*!< Current Max value */ + mon_type_t mon_min_value; /*!< Current Min value */ + mon_type_t mon_value_reset;/*!< value at last reset */ + mon_type_t mon_max_value_start; /*!< Max value since start */ + mon_type_t mon_min_value_start; /*!< Min value since start */ + mon_type_t mon_start_value;/*!< Value at the start time */ + mon_type_t mon_last_value; /*!< Last set of values */ + monitor_running_t mon_status; /* whether monitor still running */ +}; + +typedef struct monitor_value_struct monitor_value_t; + +/** Follwoing defines are possible values for "monitor_type" field in +"struct monitor_info" */ +enum monitor_type_value { + MONITOR_NONE = 0, /*!< No monitoring */ + MONITOR_MODULE = 1, /*!< This is a monitor module type, + not a counter */ + MONITOR_EXISTING = 2, /*!< The monitor carries information from + an existing system status variable */ + MONITOR_NO_AVERAGE = 4, /*!< Set this status if we don't want to + calculate the average value for the counter */ + MONITOR_DISPLAY_CURRENT = 8, /*!< Display current value of the + counter, rather than incremental value + over the period. Mostly for counters + displaying current resource usage */ + MONITOR_GROUP_MODULE = 16, /*!< Monitor can be turned on/off + only as a module, but not individually */ + MONITOR_DEFAULT_ON = 32,/*!< Monitor will be turned on by default at + server start up */ + MONITOR_SET_OWNER = 64, /*!< Owner of "monitor set", a set of + monitor counters */ + MONITOR_SET_MEMBER = 128,/*!< Being part of a "monitor set" */ + MONITOR_HIDDEN = 256 /*!< Do not display this monitor in the + metrics table */ +}; + +typedef enum monitor_type_value monitor_type_t; + +/** Counter minimum value is initialized to be max value of + mon_type_t (ib_int64_t) */ +#define MIN_RESERVED ((mon_type_t) (IB_ULONGLONG_MAX >> 1)) +#define MAX_RESERVED (~MIN_RESERVED) + +/** This enumeration defines internal monitor identifier used internally +to identify each particular counter. Its value indexes into two arrays, +one is the "innodb_counter_value" array which records actual monitor +counter values, the other is "innodb_counter_info" array which describes +each counter's basic information (name, desc etc.). A couple of +naming rules here: +1) If the monitor defines a module, it starts with MONITOR_MODULE +2) If the monitor uses exisitng counters from "status variable", its ID +name shall start with MONITOR_OVLD + +Please refer to "innodb_counter_info" in srv/srv0mon.cc for detail +information for each monitor counter */ + +enum monitor_id_value { + /* This is to identify the default value set by the metrics + control global variables */ + MONITOR_DEFAULT_START = 0, + + /* Start of Metadata counter */ + MONITOR_MODULE_METADATA, + MONITOR_TABLE_OPEN, + MONITOR_TABLE_CLOSE, + MONITOR_TABLE_REFERENCE, + MONITOR_OVLD_META_MEM_POOL, + + /* Lock manager related counters */ + MONITOR_MODULE_LOCK, + MONITOR_DEADLOCK, + MONITOR_TIMEOUT, + MONITOR_LOCKREC_WAIT, + MONITOR_TABLELOCK_WAIT, + MONITOR_NUM_RECLOCK_REQ, + MONITOR_RECLOCK_CREATED, + MONITOR_RECLOCK_REMOVED, + MONITOR_NUM_RECLOCK, + MONITOR_TABLELOCK_CREATED, + MONITOR_TABLELOCK_REMOVED, + MONITOR_NUM_TABLELOCK, + MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT, + MONITOR_OVLD_LOCK_WAIT_TIME, + MONITOR_OVLD_LOCK_MAX_WAIT_TIME, + MONITOR_OVLD_ROW_LOCK_WAIT, + MONITOR_OVLD_LOCK_AVG_WAIT_TIME, + + /* Buffer and I/O realted counters. */ + MONITOR_MODULE_BUFFER, + MONITOR_OVLD_BUFFER_POOL_SIZE, + MONITOR_OVLD_BUF_POOL_READS, + MONITOR_OVLD_BUF_POOL_READ_REQUESTS, + MONITOR_OVLD_BUF_POOL_WRITE_REQUEST, + MONITOR_PAGE_INFLUSH, + MONITOR_OVLD_BUF_POOL_WAIT_FREE, + MONITOR_OVLD_BUF_POOL_READ_AHEAD, + MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED, + MONITOR_OVLD_BUF_POOL_PAGE_TOTAL, + MONITOR_OVLD_BUF_POOL_PAGE_MISC, + MONITOR_OVLD_BUF_POOL_PAGES_DATA, + MONITOR_OVLD_BUF_POOL_PAGES_DIRTY, + MONITOR_OVLD_BUF_POOL_PAGES_FREE, + MONITOR_OVLD_PAGE_CREATED, + MONITOR_OVLD_PAGES_WRITTEN, + MONITOR_OVLD_PAGES_READ, + MONITOR_OVLD_BYTE_READ, + MONITOR_OVLD_BYTE_WRITTEN, + MONITOR_FLUSH_BATCH_SCANNED, + MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL, + MONITOR_FLUSH_BATCH_SCANNED_PER_CALL, + MONITOR_FLUSH_BATCH_TOTAL_PAGE, + MONITOR_FLUSH_BATCH_COUNT, + MONITOR_FLUSH_BATCH_PAGES, + MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, + MONITOR_FLUSH_NEIGHBOR_COUNT, + MONITOR_FLUSH_NEIGHBOR_PAGES, + MONITOR_FLUSH_MAX_DIRTY_TOTAL_PAGE, + MONITOR_FLUSH_MAX_DIRTY_COUNT, + MONITOR_FLUSH_MAX_DIRTY_PAGES, + MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, + MONITOR_FLUSH_ADAPTIVE_COUNT, + MONITOR_FLUSH_ADAPTIVE_PAGES, + MONITOR_FLUSH_ASYNC_TOTAL_PAGE, + MONITOR_FLUSH_ASYNC_COUNT, + MONITOR_FLUSH_ASYNC_PAGES, + MONITOR_FLUSH_SYNC_TOTAL_PAGE, + MONITOR_FLUSH_SYNC_COUNT, + MONITOR_FLUSH_SYNC_PAGES, + MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, + MONITOR_FLUSH_BACKGROUND_COUNT, + MONITOR_FLUSH_BACKGROUND_PAGES, + MONITOR_LRU_BATCH_SCANNED, + MONITOR_LRU_BATCH_SCANNED_NUM_CALL, + MONITOR_LRU_BATCH_SCANNED_PER_CALL, + MONITOR_LRU_BATCH_TOTAL_PAGE, + MONITOR_LRU_BATCH_COUNT, + MONITOR_LRU_BATCH_PAGES, + MONITOR_LRU_SINGLE_FLUSH_SCANNED, + MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL, + MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL, + MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT, + MONITOR_LRU_GET_FREE_SEARCH, + MONITOR_LRU_SEARCH_SCANNED, + MONITOR_LRU_SEARCH_SCANNED_NUM_CALL, + MONITOR_LRU_SEARCH_SCANNED_PER_CALL, + MONITOR_LRU_UNZIP_SEARCH_SCANNED, + MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL, + MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL, + + /* Buffer Page I/O specific counters. */ + MONITOR_MODULE_BUF_PAGE, + MONITOR_INDEX_LEAF_PAGE_READ, + MONITOR_INDEX_NON_LEAF_PAGE_READ, + MONITOR_INDEX_IBUF_LEAF_PAGE_READ, + MONITOR_INDEX_IBUF_NON_LEAF_PAGE_READ, + MONITOR_UNDO_LOG_PAGE_READ, + MONITOR_INODE_PAGE_READ, + MONITOR_IBUF_FREELIST_PAGE_READ, + MONITOR_IBUF_BITMAP_PAGE_READ, + MONITOR_SYSTEM_PAGE_READ, + MONITOR_TRX_SYSTEM_PAGE_READ, + MONITOR_FSP_HDR_PAGE_READ, + MONITOR_XDES_PAGE_READ, + MONITOR_BLOB_PAGE_READ, + MONITOR_ZBLOB_PAGE_READ, + MONITOR_ZBLOB2_PAGE_READ, + MONITOR_OTHER_PAGE_READ, + MONITOR_INDEX_LEAF_PAGE_WRITTEN, + MONITOR_INDEX_NON_LEAF_PAGE_WRITTEN, + MONITOR_INDEX_IBUF_LEAF_PAGE_WRITTEN, + MONITOR_INDEX_IBUF_NON_LEAF_PAGE_WRITTEN, + MONITOR_UNDO_LOG_PAGE_WRITTEN, + MONITOR_INODE_PAGE_WRITTEN, + MONITOR_IBUF_FREELIST_PAGE_WRITTEN, + MONITOR_IBUF_BITMAP_PAGE_WRITTEN, + MONITOR_SYSTEM_PAGE_WRITTEN, + MONITOR_TRX_SYSTEM_PAGE_WRITTEN, + MONITOR_FSP_HDR_PAGE_WRITTEN, + MONITOR_XDES_PAGE_WRITTEN, + MONITOR_BLOB_PAGE_WRITTEN, + MONITOR_ZBLOB_PAGE_WRITTEN, + MONITOR_ZBLOB2_PAGE_WRITTEN, + MONITOR_OTHER_PAGE_WRITTEN, + + /* OS level counters (I/O) */ + MONITOR_MODULE_OS, + MONITOR_OVLD_OS_FILE_READ, + MONITOR_OVLD_OS_FILE_WRITE, + MONITOR_OVLD_OS_FSYNC, + MONITOR_OS_PENDING_READS, + MONITOR_OS_PENDING_WRITES, + MONITOR_OVLD_OS_LOG_WRITTEN, + MONITOR_OVLD_OS_LOG_FSYNC, + MONITOR_OVLD_OS_LOG_PENDING_FSYNC, + MONITOR_OVLD_OS_LOG_PENDING_WRITES, + + /* Transaction related counters */ + MONITOR_MODULE_TRX, + MONITOR_TRX_RW_COMMIT, + MONITOR_TRX_RO_COMMIT, + MONITOR_TRX_NL_RO_COMMIT, + MONITOR_TRX_COMMIT_UNDO, + MONITOR_TRX_ROLLBACK, + MONITOR_TRX_ROLLBACK_SAVEPOINT, + MONITOR_TRX_ROLLBACK_ACTIVE, + MONITOR_TRX_ACTIVE, + MONITOR_RSEG_HISTORY_LEN, + MONITOR_NUM_UNDO_SLOT_USED, + MONITOR_NUM_UNDO_SLOT_CACHED, + MONITOR_RSEG_CUR_SIZE, + + /* Purge related counters */ + MONITOR_MODULE_PURGE, + MONITOR_N_DEL_ROW_PURGE, + MONITOR_N_UPD_EXIST_EXTERN, + MONITOR_PURGE_INVOKED, + MONITOR_PURGE_N_PAGE_HANDLED, + MONITOR_DML_PURGE_DELAY, + MONITOR_PURGE_STOP_COUNT, + MONITOR_PURGE_RESUME_COUNT, + + /* Recovery related counters */ + MONITOR_MODULE_RECOVERY, + MONITOR_NUM_CHECKPOINT, + MONITOR_OVLD_LSN_FLUSHDISK, + MONITOR_OVLD_LSN_CHECKPOINT, + MONITOR_OVLD_LSN_CURRENT, + MONITOR_LSN_CHECKPOINT_AGE, + MONITOR_OVLD_BUF_OLDEST_LSN, + MONITOR_OVLD_MAX_AGE_ASYNC, + MONITOR_OVLD_MAX_AGE_SYNC, + MONITOR_PENDING_LOG_WRITE, + MONITOR_PENDING_CHECKPOINT_WRITE, + MONITOR_LOG_IO, + MONITOR_OVLD_LOG_WAITS, + MONITOR_OVLD_LOG_WRITE_REQUEST, + MONITOR_OVLD_LOG_WRITES, + + /* Page Manager related counters */ + MONITOR_MODULE_PAGE, + MONITOR_PAGE_COMPRESS, + MONITOR_PAGE_DECOMPRESS, + + /* Index related counters */ + MONITOR_MODULE_INDEX, + MONITOR_INDEX_SPLIT, + MONITOR_INDEX_MERGE, + + /* Adaptive Hash Index related counters */ + MONITOR_MODULE_ADAPTIVE_HASH, + MONITOR_OVLD_ADAPTIVE_HASH_SEARCH, + MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE, + MONITOR_ADAPTIVE_HASH_PAGE_ADDED, + MONITOR_ADAPTIVE_HASH_PAGE_REMOVED, + MONITOR_ADAPTIVE_HASH_ROW_ADDED, + MONITOR_ADAPTIVE_HASH_ROW_REMOVED, + MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND, + MONITOR_ADAPTIVE_HASH_ROW_UPDATED, + + /* Tablespace related counters */ + MONITOR_MODULE_FIL_SYSTEM, + MONITOR_OVLD_N_FILE_OPENED, + + /* InnoDB Change Buffer related counters */ + MONITOR_MODULE_IBUF_SYSTEM, + MONITOR_OVLD_IBUF_MERGE_INSERT, + MONITOR_OVLD_IBUF_MERGE_DELETE, + MONITOR_OVLD_IBUF_MERGE_PURGE, + MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT, + MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE, + MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE, + MONITOR_OVLD_IBUF_MERGES, + MONITOR_OVLD_IBUF_SIZE, + + /* Counters for server operations */ + MONITOR_MODULE_SERVER, + MONITOR_MASTER_THREAD_SLEEP, + MONITOR_OVLD_SERVER_ACTIVITY, + MONITOR_MASTER_ACTIVE_LOOPS, + MONITOR_MASTER_IDLE_LOOPS, + MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, + MONITOR_SRV_IBUF_MERGE_MICROSECOND, + MONITOR_SRV_LOG_FLUSH_MICROSECOND, + MONITOR_SRV_MEM_VALIDATE_MICROSECOND, + MONITOR_SRV_PURGE_MICROSECOND, + MONITOR_SRV_DICT_LRU_MICROSECOND, + MONITOR_SRV_CHECKPOINT_MICROSECOND, + MONITOR_OVLD_SRV_DBLWR_WRITES, + MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN, + MONITOR_OVLD_SRV_PAGE_SIZE, + MONITOR_OVLD_RWLOCK_S_SPIN_WAITS, + MONITOR_OVLD_RWLOCK_X_SPIN_WAITS, + MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS, + MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS, + MONITOR_OVLD_RWLOCK_S_OS_WAITS, + MONITOR_OVLD_RWLOCK_X_OS_WAITS, + + /* Data DML related counters */ + MONITOR_MODULE_DML_STATS, + MONITOR_OLVD_ROW_READ, + MONITOR_OLVD_ROW_INSERTED, + MONITOR_OLVD_ROW_DELETED, + MONITOR_OLVD_ROW_UPDTATED, + + /* Data DDL related counters */ + MONITOR_MODULE_DDL_STATS, + MONITOR_BACKGROUND_DROP_TABLE, + + MONITOR_MODULE_ICP, + MONITOR_ICP_ATTEMPTS, + MONITOR_ICP_NO_MATCH, + MONITOR_ICP_OUT_OF_RANGE, + MONITOR_ICP_MATCH, + + /* This is used only for control system to turn + on/off and reset all monitor counters */ + MONITOR_ALL_COUNTER, + + /* This must be the last member */ + NUM_MONITOR +}; + +typedef enum monitor_id_value monitor_id_t; + +/** This informs the monitor control system to turn +on/off and reset monitor counters through wild card match */ +#define MONITOR_WILDCARD_MATCH (NUM_MONITOR + 1) + +/** Cannot find monitor counter with a specified name */ +#define MONITOR_NO_MATCH (NUM_MONITOR + 2) + +/** struct monitor_info describes the basic/static information +about each monitor counter. */ +struct monitor_info_struct { + const char* monitor_name; /*!< Monitor name */ + const char* monitor_module; /*!< Sub Module the monitor + belongs to */ + const char* monitor_desc; /*!< Brief desc of monitor counter */ + monitor_type_t monitor_type; /*!< Type of Monitor Info */ + monitor_id_t monitor_related_id;/*!< Monitor ID of counter that + related to this monitor. This is + set when the monitor belongs to + a "monitor set" */ + monitor_id_t monitor_id; /*!< Monitor ID as defined in enum + monitor_id_t */ +}; + +typedef struct monitor_info_struct monitor_info_t; + +/** Following are the "set_option" values allowed for +srv_mon_process_existing_counter() and srv_mon_process_existing_counter() +functions. To turn on/off/reset the monitor counters. */ +enum mon_set_option { + MONITOR_TURN_ON = 1, /*!< Turn on the counter */ + MONITOR_TURN_OFF, /*!< Turn off the counter */ + MONITOR_RESET_VALUE, /*!< Reset current values */ + MONITOR_RESET_ALL_VALUE, /*!< Reset all values */ + MONITOR_GET_VALUE /*!< Option for + srv_mon_process_existing_counter() + function */ +}; + +typedef enum mon_set_option mon_option_t; + +/** Number of bit in a ulint datatype */ +#define NUM_BITS_ULINT (sizeof(ulint) * CHAR_BIT) + +/** This "monitor_set_tbl" is a bitmap records whether a particular monitor +counter has been turned on or off */ +extern ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) / + NUM_BITS_ULINT]; + +/** Macros to turn on/off the control bit in monitor_set_tbl for a monitor +counter option. */ +#define MONITOR_ON(monitor) \ + (monitor_set_tbl[monitor / NUM_BITS_ULINT] |= \ + ((ulint)1 << (monitor % NUM_BITS_ULINT))) + +#define MONITOR_OFF(monitor) \ + (monitor_set_tbl[monitor / NUM_BITS_ULINT] &= \ + ~((ulint)1 << (monitor % NUM_BITS_ULINT))) + +/** Check whether the requested monitor is turned on/off */ +#define MONITOR_IS_ON(monitor) \ + (monitor_set_tbl[monitor / NUM_BITS_ULINT] & \ + ((ulint)1 << (monitor % NUM_BITS_ULINT))) + +/** The actual monitor counter array that records each monintor counter +value */ +extern monitor_value_t innodb_counter_value[NUM_MONITOR]; + +/** Following are macro defines for basic montior counter manipulations. +Please note we do not provide any synchronization for these monitor +operations due to performance consideration. Most counters can +be placed under existing mutex protections in respective code +module. */ + +/** Macros to access various fields of a monitor counters */ +#define MONITOR_FIELD(monitor, field) \ + (innodb_counter_value[monitor].field) + +#define MONITOR_VALUE(monitor) \ + MONITOR_FIELD(monitor, mon_value) + +#define MONITOR_MAX_VALUE(monitor) \ + MONITOR_FIELD(monitor, mon_max_value) + +#define MONITOR_MIN_VALUE(monitor) \ + MONITOR_FIELD(monitor, mon_min_value) + +#define MONITOR_VALUE_RESET(monitor) \ + MONITOR_FIELD(monitor, mon_value_reset) + +#define MONITOR_MAX_VALUE_START(monitor) \ + MONITOR_FIELD(monitor, mon_max_value_start) + +#define MONITOR_MIN_VALUE_START(monitor) \ + MONITOR_FIELD(monitor, mon_min_value_start) + +#define MONITOR_LAST_VALUE(monitor) \ + MONITOR_FIELD(monitor, mon_last_value) + +#define MONITOR_START_VALUE(monitor) \ + MONITOR_FIELD(monitor, mon_start_value) + +#define MONITOR_VALUE_SINCE_START(monitor) \ + (MONITOR_VALUE(monitor) + MONITOR_VALUE_RESET(monitor)) + +#define MONITOR_STATUS(monitor) \ + MONITOR_FIELD(monitor, mon_status) + +#define MONITOR_SET_START(monitor) \ + do { \ + MONITOR_STATUS(monitor) = MONITOR_STARTED; \ + MONITOR_FIELD((monitor), mon_start_time) = time(NULL); \ + } while (0) + +#define MONITOR_SET_OFF(monitor) \ + do { \ + MONITOR_STATUS(monitor) = MONITOR_STOPPED; \ + MONITOR_FIELD((monitor), mon_stop_time) = time(NULL); \ + } while (0) + +#define MONITOR_INIT_ZERO_VALUE 0 + +/** Max and min values are initialized when we first turn on the monitor +counter, and set the MONITOR_STATUS. */ +#define MONITOR_MAX_MIN_NOT_INIT(monitor) \ + (MONITOR_STATUS(monitor) == MONITOR_INIT_ZERO_VALUE \ + && MONITOR_MIN_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE \ + && MONITOR_MAX_VALUE(monitor) == MONITOR_INIT_ZERO_VALUE) + +#define MONITOR_INIT(monitor) \ + if (MONITOR_MAX_MIN_NOT_INIT(monitor)) { \ + MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; \ + MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED; \ + MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; \ + MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED; \ + } + +/** Macros to increment/decrement the counters. The normal +monitor counter operation expects appropriate synchronization +already exists. No additional mutex is necessary when operating +on the counters */ +#define MONITOR_INC(monitor) \ + if (MONITOR_IS_ON(monitor)) { \ + MONITOR_VALUE(monitor)++; \ + if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ + MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } + +#ifdef HAVE_ATOMIC_BUILTINS + +# define MONITOR_ATOMIC_INC(monitor) \ + if (MONITOR_IS_ON(monitor)) { \ + ib_uint64_t value; \ + value = os_atomic_increment_uint64( \ + (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \ + /* Note: This is not 100% accurate because of the \ + inherent race, we ignore it due to performance. */ \ + if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) { \ + MONITOR_MAX_VALUE(monitor) = value; \ + } \ + } + +# define MONITOR_ATOMIC_DEC(monitor) \ + if (MONITOR_IS_ON(monitor)) { \ + ib_uint64_t value; \ + value = os_atomic_decrement_ulint( \ + (ib_uint64_t*) &MONITOR_VALUE(monitor), 1); \ + /* Note: This is not 100% accurate because of the \ + inherent race, we ignore it due to performance. */ \ + if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) { \ + MONITOR_MIN_VALUE(monitor) = value; \ + } \ + } +#endif /* HAVE_ATOMIC_BUILTINS */ + +#define MONITOR_DEC(monitor) \ + if (MONITOR_IS_ON(monitor)) { \ + MONITOR_VALUE(monitor)--; \ + if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ + MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } + +#define MONITOR_INC_VALUE(monitor, value) \ + if (MONITOR_IS_ON(monitor)) { \ + MONITOR_VALUE(monitor) += (mon_type_t) (value); \ + if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ + MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } + +#define MONITOR_DEC_VALUE(monitor, value) \ + if (MONITOR_IS_ON(monitor)) { \ + ut_ad(MONITOR_VALUE(monitor) >= (mon_type_t) (value); \ + MONITOR_VALUE(monitor) -= (mon_type_t) (value); \ + if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ + MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } + +/* Increment/decrement counter without check the monitor on/off bit, which +could already be checked as a module group */ +#define MONITOR_INC_NOCHECK(monitor) \ + do { \ + MONITOR_VALUE(monitor)++; \ + if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ + MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } while (0) \ + +#define MONITOR_DEC_NOCHECK(monitor) \ + do { \ + MONITOR_VALUE(monitor)--; \ + if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ + MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } while (0) + +/** Directly set a monitor counter's value */ +#define MONITOR_SET(monitor, value) \ + if (MONITOR_IS_ON(monitor)) { \ + MONITOR_VALUE(monitor) = (mon_type_t) (value); \ + if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ + MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + if (MONITOR_VALUE(monitor) < MONITOR_MIN_VALUE(monitor)) { \ + MONITOR_MIN_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } + +/** Add time difference between now and input "value" (in seconds) to the +monitor counter +@monitor monitor to update for the time difference +@value the start time value */ +#define MONITOR_INC_TIME_IN_MICRO_SECS(monitor, value) \ + if (MONITOR_IS_ON(monitor)) { \ + ullint old_time = (value); \ + value = ut_time_us(NULL); \ + MONITOR_VALUE(monitor) += (mon_type_t) (value - old_time);\ + } + +/** This macro updates 3 counters in one call. However, it only checks the +main/first monitor counter 'monitor', to see it is on or off to decide +whether to do the update. +@monitor the main monitor counter to update. It accounts for + the accumulative value for the counter. +@monitor_n_calls counter that counts number of times this macro is + called +@monitor_per_call counter that records the current and max value of + each incremental value +@value incremental value to record this time */ +#define MONITOR_INC_VALUE_CUMULATIVE( \ + monitor, monitor_n_calls, monitor_per_call, value) \ + if (MONITOR_IS_ON(monitor)) { \ + MONITOR_VALUE(monitor_n_calls)++; \ + MONITOR_VALUE(monitor_per_call) = (mon_type_t) (value); \ + if (MONITOR_VALUE(monitor_per_call) \ + > MONITOR_MAX_VALUE(monitor_per_call)) { \ + MONITOR_MAX_VALUE(monitor_per_call) = \ + (mon_type_t) (value); \ + } \ + MONITOR_VALUE(monitor) += (mon_type_t) (value); \ + if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ + MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } + +/** Directly set a monitor counter's value, and if the value +is monotonically increasing, only max value needs to be updated */ +#define MONITOR_SET_UPD_MAX_ONLY(monitor, value) \ + if (MONITOR_IS_ON(monitor)) { \ + MONITOR_VALUE(monitor) = (mon_type_t) (value); \ + if (MONITOR_VALUE(monitor) > MONITOR_MAX_VALUE(monitor)) { \ + MONITOR_MAX_VALUE(monitor) = MONITOR_VALUE(monitor);\ + } \ + } + +/** Some values such as log sequence number are montomically increasing +number, do not need to record max/min values */ +#define MONITOR_SET_SIMPLE(monitor, value) \ + if (MONITOR_IS_ON(monitor)) { \ + MONITOR_VALUE(monitor) = (mon_type_t) (value); \ + } + +/** Reset the monitor value and max/min value to zero. The reset +operation would only be conducted when the counter is turned off */ +#define MONITOR_RESET_ALL(monitor) \ + do { \ + MONITOR_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE; \ + MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; \ + MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; \ + MONITOR_VALUE_RESET(monitor) = MONITOR_INIT_ZERO_VALUE; \ + MONITOR_MAX_VALUE_START(monitor) = MAX_RESERVED; \ + MONITOR_MIN_VALUE_START(monitor) = MIN_RESERVED; \ + MONITOR_LAST_VALUE(monitor) = MONITOR_INIT_ZERO_VALUE; \ + MONITOR_FIELD(monitor, mon_start_time) = \ + MONITOR_INIT_ZERO_VALUE; \ + MONITOR_FIELD(monitor, mon_stop_time) = \ + MONITOR_INIT_ZERO_VALUE; \ + MONITOR_FIELD(monitor, mon_reset_time) = \ + MONITOR_INIT_ZERO_VALUE; \ + } while (0) + +/** Following four macros defines necessary operations to fetch and +consolidate information from existing system status variables. */ + +/** Save the passed-in value to mon_start_value field of monitor +counters */ +#define MONITOR_SAVE_START(monitor, value) \ + (MONITOR_START_VALUE(monitor) = \ + (mon_type_t) (value) - MONITOR_VALUE_RESET(monitor)) + +/** Save the passed-in value to mon_last_value field of monitor +counters */ +#define MONITOR_SAVE_LAST(monitor) \ + do { \ + MONITOR_LAST_VALUE(monitor) = MONITOR_VALUE(monitor); \ + MONITOR_START_VALUE(monitor) += MONITOR_VALUE(monitor); \ + } while (0) + +/** Set monitor value to the difference of value and mon_start_value +compensated by mon_last_value if accumulated value is required. */ +#define MONITOR_SET_DIFF(monitor, value) \ + MONITOR_SET_UPD_MAX_ONLY(monitor, ((value) \ + - MONITOR_VALUE_RESET(monitor) \ + - MONITOR_FIELD(monitor, mon_start_value) \ + + MONITOR_FIELD(monitor, mon_last_value))) + +/****************************************************************//** +Get monitor's monitor_info_t by its monitor id (index into the +innodb_counter_info array +@return Point to corresponding monitor_info_t, or NULL if no such +monitor */ +UNIV_INTERN +monitor_info_t* +srv_mon_get_info( +/*=============*/ + monitor_id_t monitor_id); /*!< id index into the + innodb_counter_info array */ +/****************************************************************//** +Get monitor's name by its monitor id (index into the +innodb_counter_info array +@return corresponding monitor name, or NULL if no such +monitor */ +UNIV_INTERN +const char* +srv_mon_get_name( +/*=============*/ + monitor_id_t monitor_id); /*!< id index into the + innodb_counter_info array */ + +/****************************************************************//** +Turn on/off/reset monitor counters in a module. If module_value +is NUM_MONITOR then turn on all monitor counters. +@return 0 if successful, or the first monitor that cannot be +turned on because it is already turned on. */ +UNIV_INTERN +void +srv_mon_set_module_control( +/*=======================*/ + monitor_id_t module_id, /*!< in: Module ID as in + monitor_counter_id. If it is + set to NUM_MONITOR, this means + we shall turn on all the counters */ + mon_option_t set_option); /*!< in: Turn on/off reset the + counter */ +/****************************************************************//** +This function consolidates some existing server counters used +by "system status variables". These existing system variables do not have +mechanism to start/stop and reset the counters, so we simulate these +controls by remembering the corresponding counter values when the +corresponding monitors are turned on/off/reset, and do appropriate +mathematics to deduct the actual value. */ +UNIV_INTERN +void +srv_mon_process_existing_counter( +/*=============================*/ + monitor_id_t monitor_id, /*!< in: the monitor's ID as in + monitor_counter_id */ + mon_option_t set_option); /*!< in: Turn on/off reset the + counter */ +/*************************************************************//** +This function is used to calculate the maximum counter value +since the start of monitor counter +@return max counter value since start. */ +UNIV_INLINE +mon_type_t +srv_mon_calc_max_since_start( +/*=========================*/ + monitor_id_t monitor); /*!< in: monitor id */ +/*************************************************************//** +This function is used to calculate the minimum counter value +since the start of monitor counter +@return min counter value since start. */ +UNIV_INLINE +mon_type_t +srv_mon_calc_min_since_start( +/*=========================*/ + monitor_id_t monitor); /*!< in: monitor id*/ +/*************************************************************//** +Reset a monitor, create a new base line with the current monitor +value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */ +UNIV_INTERN +void +srv_mon_reset( +/*==========*/ + monitor_id_t monitor); /*!< in: monitor id*/ +/*************************************************************//** +This function resets all values of a monitor counter */ +UNIV_INLINE +void +srv_mon_reset_all( +/*==============*/ + monitor_id_t monitor); /*!< in: monitor id*/ +/*************************************************************//** +Turn on monitor counters that are marked as default ON. */ +UNIV_INTERN +void +srv_mon_default_on(void); +/*====================*/ + +#ifndef UNIV_NONINL +#include "srv0mon.ic" +#endif +#else /* !UNIV_HOTBACKUP */ +# define MONITOR_INC(x) ((void) 0) +# define MONITOR_DEC(x) ((void) 0) +#endif /* !UNIV_HOTBACKUP */ + +#endif diff --git a/storage/innobase/include/srv0mon.ic b/storage/innobase/include/srv0mon.ic new file mode 100644 index 00000000000..17411d77a8b --- /dev/null +++ b/storage/innobase/include/srv0mon.ic @@ -0,0 +1,113 @@ +/***************************************************************************** + +Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/*******************************************************************//** +@file include/srv0mon.ic +Server monitoring system + +Created 1/20/2010 Jimmy Yang +************************************************************************/ + +/*************************************************************//** +This function is used to calculate the maximum counter value +since the start of monitor counter +@return max counter value since start. */ +UNIV_INLINE +mon_type_t +srv_mon_calc_max_since_start( +/*=========================*/ + monitor_id_t monitor) /*!< in: monitor id */ +{ + if (MONITOR_MAX_VALUE_START(monitor) == MAX_RESERVED) { + + /* MONITOR_MAX_VALUE_START has not yet been + initialized, the max value since start is the + max count in MONITOR_MAX_VALUE */ + MONITOR_MAX_VALUE_START(monitor) = + MONITOR_MAX_VALUE(monitor); + + } else if (MONITOR_MAX_VALUE(monitor) != MAX_RESERVED + && (MONITOR_MAX_VALUE(monitor) + + MONITOR_VALUE_RESET(monitor) + > MONITOR_MAX_VALUE_START(monitor))) { + + /* If the max value since reset (as specified + in MONITOR_MAX_VALUE) plus the reset value is + larger than MONITOR_MAX_VALUE_START, reset + MONITOR_MAX_VALUE_START to this new max value */ + MONITOR_MAX_VALUE_START(monitor) = + MONITOR_MAX_VALUE(monitor) + + MONITOR_VALUE_RESET(monitor); + } + + return(MONITOR_MAX_VALUE_START(monitor)); +} + +/*************************************************************//** +This function is used to calculate the minimum counter value +since the start of monitor counter +@return min counter value since start. */ +UNIV_INLINE +mon_type_t +srv_mon_calc_min_since_start( +/*=========================*/ + monitor_id_t monitor) /*!< in: monitor id */ +{ + if (MONITOR_MIN_VALUE_START(monitor) == MIN_RESERVED) { + + /* MONITOR_MIN_VALUE_START has not yet been + initialized, the min value since start is the + min count in MONITOR_MIN_VALUE */ + MONITOR_MIN_VALUE_START(monitor) = + MONITOR_MIN_VALUE(monitor); + + } else if (MONITOR_MIN_VALUE(monitor) != MIN_RESERVED + && (MONITOR_MIN_VALUE(monitor) + + MONITOR_VALUE_RESET(monitor) + < MONITOR_MIN_VALUE_START(monitor))) { + + /* If the min value since reset (as specified + in MONITOR_MIN_VALUE) plus the reset value is + less than MONITOR_MIN_VALUE_START, reset + MONITOR_MIN_VALUE_START to this new min value */ + MONITOR_MIN_VALUE_START(monitor) = + MONITOR_MIN_VALUE(monitor) + + MONITOR_VALUE_RESET(monitor); + } + + return(MONITOR_MIN_VALUE_START(monitor)); +} + +/*************************************************************//** +This function resets all values of a monitor counter */ +UNIV_INLINE +void +srv_mon_reset_all( +/*==============*/ + monitor_id_t monitor) /*!< in: monitor id */ +{ + /* Do not reset all counter values if monitor is still on. */ + if (MONITOR_IS_ON(monitor)) { + fprintf(stderr, "InnoDB: Cannot reset all values for " + "monitor counter %s while it is on. Please " + "turn it off and retry. \n", + srv_mon_get_name(monitor)); + } else { + MONITOR_RESET_ALL(monitor); + } +} diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index ed2f4672a99..99cff251e3c 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. @@ -26,8 +26,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -43,15 +43,18 @@ Created 10/10/1995 Heikki Tuuri #include "univ.i" #ifndef UNIV_HOTBACKUP +#include "log0log.h" #include "sync0sync.h" #include "os0sync.h" #include "que0types.h" #include "trx0types.h" +#include "srv0conc.h" +#include "buf0checksum.h" extern const char* srv_main_thread_op_info; /** Prefix used by MySQL to indicate pre-5.1 table name encoding */ -extern const char srv_mysql50_table_name_prefix[9]; +extern const char srv_mysql50_table_name_prefix[10]; /* When this event is set the lock timeout and InnoDB monitor thread starts running */ @@ -66,6 +69,21 @@ extern os_event_t srv_timeout_event; /* The error monitor thread waits on this event. */ extern os_event_t srv_error_event; +/** The buffer pool dump/load thread waits on this event. */ +extern os_event_t srv_buf_dump_event; + +/** The buffer pool dump/load file name */ +#define SRV_BUF_DUMP_FILENAME_DEFAULT "ib_buffer_pool" +extern char* srv_buf_dump_filename; + +/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown +and/or load it during startup. */ +extern char srv_buffer_pool_dump_at_shutdown; +extern char srv_buffer_pool_load_at_startup; + +/* Whether to disable file system cache if it is defined */ +extern char srv_disable_sort_file_cache; + /* If the last data file is auto-extended, we add this many pages to it at a time */ #define SRV_AUTO_EXTEND_INCREMENT \ @@ -91,27 +109,33 @@ extern FILE* srv_misc_tmpfile; /* Server parameters which are read from the initfile */ extern char* srv_data_home; + #ifdef UNIV_LOG_ARCHIVE extern char* srv_arch_dir; #endif /* UNIV_LOG_ARCHIVE */ /** store to its own file each table created by an user; data dictionary tables are in the system tablespace 0 */ -#ifndef UNIV_HOTBACKUP extern my_bool srv_file_per_table; -#else -extern ibool srv_file_per_table; -#endif /* UNIV_HOTBACKUP */ +/** Sleep delay for threads waiting to enter InnoDB. In micro-seconds. */ +extern ulong srv_thread_sleep_delay; +#if defined(HAVE_ATOMIC_BUILTINS) +/** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/ +extern ulong srv_adaptive_max_sleep_delay; +#endif /* HAVE_ATOMIC_BUILTINS */ + /** The file format to use on new *.ibd files. */ extern ulint srv_file_format; /** Whether to check file format during startup. A value of -DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to +UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to set it to the highest format we support. */ extern ulint srv_max_file_format_at_startup; /** Place locks to records only i.e. do not use next-key locking except on duplicate key checking and foreign key checking */ extern ibool srv_locks_unsafe_for_binlog; -#endif /* !UNIV_HOTBACKUP */ + +/* Variable specifying the FTS parallel sort buffer size */ +extern ulong srv_sort_buf_size; /* If this flag is TRUE, then we will use the native aio of the OS (provided we compiled Innobase with it in), otherwise we will @@ -120,7 +144,18 @@ Currently we support native aio on windows and linux */ extern my_bool srv_use_native_aio; #ifdef __WIN__ extern ibool srv_use_native_conditions; -#endif +#endif /* __WIN__ */ +#endif /* !UNIV_HOTBACKUP */ + +/** Server undo tablespaces directory, can be absolute path. */ +extern char* srv_undo_dir; + +/** Number of undo tablespaces to use. */ +extern ulong srv_undo_tablespaces; + +/* The number of undo segments to use */ +extern ulong srv_undo_logs; + extern ulint srv_n_data_files; extern char** srv_data_file_names; extern ulint* srv_data_file_sizes; @@ -136,7 +171,7 @@ extern ibool srv_created_new_raw; extern ulint srv_n_log_groups; extern ulint srv_n_log_files; -extern ulint srv_log_file_size; +extern ib_uint64_t srv_log_file_size; extern ulint srv_log_buffer_size; extern ulong srv_flush_log_at_trx_commit; extern char srv_adaptive_flushing; @@ -156,6 +191,12 @@ extern ibool srv_use_sys_malloc; #endif /* UNIV_HOTBACKUP */ extern ulint srv_buf_pool_size; /*!< requested size in bytes */ extern ulint srv_buf_pool_instances; /*!< requested number of buffer pool instances */ +extern ulong srv_n_page_hash_locks; /*!< number of locks to + protect buf_pool->page_hash */ +extern ulong srv_LRU_scan_depth; /*!< Scan depth for LRU + flush batch */ +extern my_bool srv_flush_neighbors; /*!< whether or not to flush + neighbors of a block */ extern ulint srv_buf_pool_old_size; /*!< previously requested size */ extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */ extern ulint srv_mem_pool_size; @@ -172,7 +213,7 @@ extern ulong srv_io_capacity; /* Returns the number of IO operations that is X percent of the capacity. PCT_IO(5) -> returns the number of IO operations that is 5% of the max where max is srv_io_capacity. */ -#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0))) +#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) (p) / 100.0))) /* The "innodb_stats_method" setting, decides how InnoDB is going to treat NULL value when collecting statistics. It is not defined @@ -194,28 +235,26 @@ extern ulint srv_max_n_open_files; extern ulint srv_max_dirty_pages_pct; extern ulint srv_force_recovery; -extern ulong srv_thread_concurrency; - -extern ulint srv_max_n_threads; -extern lint srv_conc_n_threads; - -extern ulint srv_fast_shutdown; /* If this is 1, do not do a - purge and index buffer merge. - If this 2, do not even flush the - buffer pool to data files at the - shutdown: we effectively 'crash' - InnoDB (but lose no committed - transactions). */ +extern ulint srv_fast_shutdown; /*!< If this is 1, do not do a + purge and index buffer merge. + If this 2, do not even flush the + buffer pool to data files at the + shutdown: we effectively 'crash' + InnoDB (but lose no committed + transactions). */ extern ibool srv_innodb_status; -extern unsigned long long srv_stats_sample_pages; +extern unsigned long long srv_stats_transient_sample_pages; +extern unsigned long long srv_stats_persistent_sample_pages; extern ibool srv_use_doublewrite_buf; -extern ibool srv_use_checksums; +extern ulong srv_doublewrite_batch_size; +extern ulong srv_checksum_algorithm; extern ulong srv_max_buf_pool_modified_pct; extern ulong srv_max_purge_lag; +extern ulong srv_max_purge_lag_delay; extern ulong srv_replication_delay; /*-------------------------------------------*/ @@ -229,19 +268,29 @@ extern ibool srv_print_innodb_monitor; extern ibool srv_print_innodb_lock_monitor; extern ibool srv_print_innodb_tablespace_monitor; extern ibool srv_print_verbose_log; +#define DEPRECATED_MSG_INNODB_TABLE_MONITOR \ + "Using innodb_table_monitor is deprecated and it may be removed " \ + "in future releases. Please use the InnoDB INFORMATION_SCHEMA " \ + "tables instead, see " REFMAN "innodb-i_s-tables.html" extern ibool srv_print_innodb_table_monitor; extern ibool srv_lock_timeout_active; extern ibool srv_monitor_active; extern ibool srv_error_monitor_active; +/* TRUE during the lifetime of the buffer pool dump/load thread */ +extern ibool srv_buf_dump_thread_active; + extern ulong srv_n_spin_wait_rounds; extern ulong srv_n_free_tickets_to_enter; extern ulong srv_thread_sleep_delay; extern ulong srv_spin_wait_delay; extern ibool srv_priority_boost; +extern ulint srv_n_lock_wait_count; + extern ulint srv_truncated_status_writes; +extern ulint srv_available_undo_logs; extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; @@ -260,16 +309,13 @@ extern ibool srv_print_latch_waits; # define srv_print_latch_waits FALSE #endif /* UNIV_DEBUG */ -extern ulint srv_activity_count; extern ulint srv_fatal_semaphore_wait_threshold; -#define SRV_SEMAPHORE_WAIT_EXTENSION 7200 extern ulint srv_dml_needed_delay; -extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, - query threads, and lock table: we allocate - it from dynamic memory to get it to the - same DRAM page as other hotspot semaphores */ -#define kernel_mutex (*kernel_mutex_temp) +#ifndef HAVE_ATOMIC_BUILTINS +/** Mutex protecting some server global variables. */ +extern mutex_t server_mutex; +#endif /* !HAVE_ATOMIC_BUILTINS */ #define SRV_MAX_N_IO_THREADS 130 @@ -285,7 +331,7 @@ extern ulint srv_log_write_requests; extern ulint srv_log_writes; /* amount of data written to the log files in bytes */ -extern ulint srv_os_log_written; +extern lsn_t srv_os_log_written; /* amount of writes being done to the log files */ extern ulint srv_os_log_pending_writes; @@ -300,8 +346,8 @@ extern ulong srv_n_purge_threads; /* the number of pages to purge in one batch */ extern ulong srv_purge_batch_size; -/* the number of rollback segments to use */ -extern ulong srv_rollback_segments; +/* the number of sync wait arrays */ +extern ulong srv_sync_array_size; /* variable that counts amount of data read in total (in bytes) */ extern ulint srv_data_read; @@ -333,20 +379,24 @@ extern ulint srv_buf_pool_flushed; reading of a disk page */ extern ulint srv_buf_pool_reads; +/* print all user-level transactions deadlocks to mysqld stderr */ +extern my_bool srv_print_all_deadlocks; + /** Status variables to be passed to MySQL */ typedef struct export_var_struct export_struc; -/** Status variables to be passed to MySQL */ -extern export_struc export_vars; +/** Thread slot in the thread table */ +typedef struct srv_slot_struct srv_slot_t; -/** The server system */ -typedef struct srv_sys_struct srv_sys_t; +/** Thread table is an array of slots */ +typedef srv_slot_t srv_table_t; -/** The server system */ -extern srv_sys_t* srv_sys; +/** Status variables to be passed to MySQL */ +extern export_struc export_vars; # ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ +extern mysql_pfs_key_t buf_page_cleaner_thread_key; extern mysql_pfs_key_t trx_rollback_clean_thread_key; extern mysql_pfs_key_t io_handler_thread_key; extern mysql_pfs_key_t srv_lock_timeout_thread_key; @@ -359,20 +409,14 @@ extern mysql_pfs_key_t srv_purge_thread_key; schema */ # define pfs_register_thread(key) \ do { \ - if (PSI_server) { \ - struct PSI_thread* psi = PSI_server->new_thread(key, NULL, 0);\ - if (psi) { \ - PSI_server->set_thread(psi); \ - } \ - } \ + struct PSI_thread* psi = PSI_CALL(new_thread)(key, NULL, 0);\ + PSI_CALL(set_thread)(psi); \ } while (0) /* This macro delist the current thread from performance schema */ # define pfs_delete_thread() \ do { \ - if (PSI_server) { \ - PSI_server->delete_current_thread(); \ - } \ + PSI_CALL(delete_current_thread)(); \ } while (0) # endif /* UNIV_PFS_THREAD */ @@ -445,10 +489,13 @@ typedef enum srv_stats_method_name_enum srv_stats_method_name_t; #ifndef UNIV_HOTBACKUP /** Types of threads existing in the system. */ enum srv_thread_type { - SRV_WORKER = 0, /**< threads serving parallelized queries and - queries released from lock wait */ - SRV_MASTER /**< the master thread, (whose type number must - be biggest) */ + SRV_NONE, /*!< None */ + SRV_WORKER, /*!< threads serving parallelized + queries and queries released from + lock wait */ + SRV_PURGE, /*!< Purge coordinator thread */ + SRV_MASTER /*!< the master thread, (whose type + number must be biggest) */ }; /*********************************************************************//** @@ -478,21 +525,6 @@ void srv_general_init(void); /*==================*/ /*********************************************************************//** -Gets the number of threads in the system. -@return sum of srv_n_threads[] */ -UNIV_INTERN -ulint -srv_get_n_threads(void); -/*===================*/ -/*********************************************************************//** -Check whether thread type has reserved a slot. -@return slot number or UNDEFINED if not found*/ -UNIV_INTERN -ulint -srv_thread_has_reserved_slot( -/*=========================*/ - enum srv_thread_type type); /*!< in: thread type to check */ -/*********************************************************************//** Sets the info describing an i/o thread current state. */ UNIV_INTERN void @@ -501,32 +533,16 @@ srv_set_io_thread_op_info( ulint i, /*!< in: the 'segment' of the i/o thread */ const char* str); /*!< in: constant char string describing the state */ -/*********************************************************************//** -Releases threads of the type given from suspension in the thread table. -NOTE! The server mutex has to be reserved by the caller! -@return number of threads released: this may be less than n if not -enough threads were suspended at the moment */ -UNIV_INTERN -ulint -srv_release_threads( -/*================*/ - enum srv_thread_type type, /*!< in: thread type */ - ulint n); /*!< in: number of threads to release */ -/*********************************************************************//** -The master thread controlling the server. -@return a dummy parameter */ -UNIV_INTERN -os_thread_ret_t -srv_master_thread( -/*==============*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ /*******************************************************************//** -Wakes up the purge thread if it's not already awake. */ +Tells the purge thread that there has been activity in the database +and wakes up the purge thread if it is suspended (not sleeping). Note +that there is a small chance that the purge thread stays suspended +(we do not protect our operation with the srv_sys_t:mutex, for +performance reasons). */ UNIV_INTERN void -srv_wake_purge_thread(void); -/*=======================*/ +srv_wake_purge_thread_if_not_active(void); +/*=====================================*/ /*******************************************************************//** Tells the Innobase server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used @@ -543,150 +559,159 @@ UNIV_INTERN void srv_wake_master_thread(void); /*========================*/ -/*******************************************************************//** -Tells the purge thread that there has been activity in the database -and wakes up the purge thread if it is suspended (not sleeping). Note -that there is a small chance that the purge thread stays suspended -(we do not protect our operation with the kernel mutex, for -performace reasons). */ +/******************************************************************//** +Outputs to a file the output of the InnoDB Monitor. +@return FALSE if not all information printed +due to failure to obtain necessary mutex */ UNIV_INTERN -void -srv_wake_purge_thread_if_not_active(void); -/*=====================================*/ -/*********************************************************************//** -Puts an OS thread to wait if there are too many concurrent threads -(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ +ibool +srv_printf_innodb_monitor( +/*======================*/ + FILE* file, /*!< in: output stream */ + ibool nowait, /*!< in: whether to wait for the + lock_sys_t::mutex */ + ulint* trx_start, /*!< out: file position of the start of + the list of active transactions */ + ulint* trx_end); /*!< out: file position of the end of + the list of active transactions */ + +/******************************************************************//** +Function to pass InnoDB status variables to MySQL */ UNIV_INTERN void -srv_conc_enter_innodb( -/*==================*/ - trx_t* trx); /*!< in: transaction object associated with the - thread */ -/*********************************************************************//** -This lets a thread enter InnoDB regardless of the number of threads inside -InnoDB. This must be called when a thread ends a lock wait. */ +srv_export_innodb_status(void); +/*==========================*/ +/*******************************************************************//** +Get current server activity count. We don't hold srv_sys::mutex while +reading this value as it is only used in heuristics. +@return activity count. */ UNIV_INTERN -void -srv_conc_force_enter_innodb( +ulint +srv_get_activity_count(void); /*========================*/ - trx_t* trx); /*!< in: transaction object associated with the - thread */ -/*********************************************************************//** -This must be called when a thread exits InnoDB in a lock wait or at the -end of an SQL statement. */ +/*******************************************************************//** +Check if there has been any activity. +@return FALSE if no change in activity counter. */ UNIV_INTERN -void -srv_conc_force_exit_innodb( -/*=======================*/ - trx_t* trx); /*!< in: transaction object associated with the - thread */ -/*********************************************************************//** -This must be called when a thread exits InnoDB. */ +ibool +srv_check_activity( +/*===============*/ + ulint old_activity_count); /*!< old activity count */ +/******************************************************************//** +Increment the server activity counter. */ UNIV_INTERN void -srv_conc_exit_innodb( -/*=================*/ - trx_t* trx); /*!< in: transaction object associated with the - thread */ -/***************************************************************//** -Puts a MySQL OS thread to wait for a lock to be released. If an error -occurs during the wait trx->error_state associated with thr is -!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK -are possible errors. DB_DEADLOCK is returned if selective deadlock -resolution chose this transaction as a victim. */ +srv_inc_activity_count(void); +/*=========================*/ + +/**********************************************************************//** +Enqueues a task to server task queue and releases a worker thread, if there +is a suspended one. */ UNIV_INTERN void -srv_suspend_mysql_thread( +srv_que_task_enqueue_low( /*=====================*/ - que_thr_t* thr); /*!< in: query thread associated with the MySQL - OS thread */ -/********************************************************************//** -Releases a MySQL OS thread waiting for a lock to be released, if the -thread is already suspended. */ + que_thr_t* thr); /*!< in: query thread */ + +/**********************************************************************//** +Check whether any background thread is active. If so, return the thread +type. +@return SRV_NONE if all are are suspended or have exited, thread +type if any are still active. */ UNIV_INTERN -void -srv_release_mysql_thread_if_suspended( -/*==================================*/ - que_thr_t* thr); /*!< in: query thread associated with the - MySQL OS thread */ +enum srv_thread_type +srv_get_active_thread_type(void); +/*============================*/ + +extern "C" { + /*********************************************************************//** -A thread which wakes up threads whose lock wait may have lasted too long. +A thread which prints the info output by various InnoDB monitors. @return a dummy parameter */ UNIV_INTERN os_thread_ret_t -srv_lock_timeout_thread( -/*====================*/ +DECLARE_THREAD(srv_monitor_thread)( +/*===============================*/ void* arg); /*!< in: a dummy parameter required by os_thread_create */ + /*********************************************************************//** -A thread which prints the info output by various InnoDB monitors. +The master thread controlling the server. @return a dummy parameter */ UNIV_INTERN os_thread_ret_t -srv_monitor_thread( -/*===============*/ +DECLARE_THREAD(srv_master_thread)( +/*==============================*/ void* arg); /*!< in: a dummy parameter required by os_thread_create */ + /************************************************************************* A thread which prints warnings about semaphore waits which have lasted too long. These can be used to track bugs which cause hangs. @return a dummy parameter */ UNIV_INTERN os_thread_ret_t -srv_error_monitor_thread( -/*=====================*/ +DECLARE_THREAD(srv_error_monitor_thread)( +/*=====================================*/ void* arg); /*!< in: a dummy parameter required by os_thread_create */ -/******************************************************************//** -Outputs to a file the output of the InnoDB Monitor. -@return FALSE if not all information printed -due to failure to obtain necessary mutex */ -UNIV_INTERN -ibool -srv_printf_innodb_monitor( -/*======================*/ - FILE* file, /*!< in: output stream */ - ibool nowait, /*!< in: whether to wait for kernel mutex */ - ulint* trx_start, /*!< out: file position of the start of - the list of active transactions */ - ulint* trx_end); /*!< out: file position of the end of - the list of active transactions */ -/******************************************************************//** -Function to pass InnoDB status variables to MySQL */ +/*********************************************************************//** +Purge coordinator thread that schedules the purge tasks. +@return a dummy parameter */ UNIV_INTERN -void -srv_export_innodb_status(void); -/*==========================*/ +os_thread_ret_t +DECLARE_THREAD(srv_purge_coordinator_thread)( +/*=========================================*/ + void* arg __attribute__((unused))); /*!< in: a dummy parameter + required by os_thread_create */ /*********************************************************************//** -Asynchronous purge thread. +Worker thread that reads tasks from the work queue and executes them. @return a dummy parameter */ UNIV_INTERN os_thread_ret_t -srv_purge_thread( -/*=============*/ - void* arg __attribute__((unused))); /*!< in: a dummy parameter - required by os_thread_create */ +DECLARE_THREAD(srv_worker_thread)( +/*==============================*/ + void* arg __attribute__((unused))); /*!< in: a dummy parameter + required by os_thread_create */ +} /* extern "C" */ /**********************************************************************//** -Enqueues a task to server task queue and releases a worker thread, if there -is a suspended one. */ +Get count of tasks in the queue. +@return number of tasks in queue */ UNIV_INTERN -void -srv_que_task_enqueue_low( -/*=====================*/ - que_thr_t* thr); /*!< in: query thread */ +ulint +srv_get_task_queue_length(void); +/*===========================*/ -/**********************************************************************//** -Check whether any background thread is active. If so, return the thread -type. -@return ULINT_UNDEFINED if all are are suspended or have exited, thread -type if any are still active. */ +/*********************************************************************//** +Releases threads of the type given from suspension in the thread table. +NOTE! The server mutex has to be reserved by the caller! +@return number of threads released: this may be less than n if not +enough threads were suspended at the moment */ UNIV_INTERN ulint -srv_get_active_thread_type(void); -/*============================*/ +srv_release_threads( +/*================*/ + enum srv_thread_type type, /*!< in: thread type */ + ulint n); /*!< in: number of threads to release */ + +/**********************************************************************//** +Check whether any background thread are active. If so print which thread +is active. Send the threads wakeup signal. +@return name of thread that is active or NULL */ +UNIV_INTERN +const char* +srv_any_background_threads_are_active(void); +/*=======================================*/ + +/**********************************************************************//** +Wakeup the purge threads. */ +UNIV_INTERN +void +srv_purge_wakeup(void); +/*==================*/ /** Status variables to be passed to MySQL */ struct export_var_struct{ @@ -698,6 +723,8 @@ struct export_var_struct{ ulint innodb_data_writes; /*!< I/O write requests */ ulint innodb_data_written; /*!< Data bytes written */ ulint innodb_data_reads; /*!< I/O read requests */ + char innodb_buffer_pool_dump_status[512];/*!< Buf pool dump status */ + char innodb_buffer_pool_load_status[512];/*!< Buf pool load status */ ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */ ulint innodb_buffer_pool_pages_data; /*!< Data pages */ ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */ @@ -720,7 +747,7 @@ struct export_var_struct{ ulint innodb_log_waits; /*!< srv_log_waits */ ulint innodb_log_write_requests; /*!< srv_log_write_requests */ ulint innodb_log_writes; /*!< srv_log_writes */ - ulint innodb_os_log_written; /*!< srv_os_log_written */ + lsn_t innodb_os_log_written; /*!< srv_os_log_written */ ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */ ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */ ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */ @@ -741,26 +768,38 @@ struct export_var_struct{ ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */ ulint innodb_rows_updated; /*!< srv_n_rows_updated */ ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */ + ulint innodb_num_open_files; /*!< fil_n_file_opened */ ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */ + ulint innodb_available_undo_logs; /*!< srv_available_undo_logs */ }; -/** Thread slot in the thread table */ -typedef struct srv_slot_struct srv_slot_t; - -/** Thread table is an array of slots */ -typedef srv_slot_t srv_table_t; - -/** The server system struct */ -struct srv_sys_struct{ - srv_table_t* threads; /*!< server thread table */ - UT_LIST_BASE_NODE_T(que_thr_t) - tasks; /*!< task queue */ +/** Thread slot in the thread table. */ +struct srv_slot_struct{ + srv_thread_type type; /*!< thread type: user, + utility etc. */ + ibool in_use; /*!< TRUE if this slot + is in use */ + ibool suspended; /*!< TRUE if the thread is + waiting for the event of this + slot */ + ib_time_t suspend_time; /*!< time when the thread was + suspended. Initialized by + lock_wait_table_reserve_slot() + for lock wait */ + ulong wait_timeout; /*!< wait time that if exceeded + the thread will be timed out. + Initialized by + lock_wait_table_reserve_slot() + for lock wait */ + os_event_t event; /*!< event used in suspending + the thread when it has nothing + to do */ + que_thr_t* thr; /*!< suspended query thread + (only used for user threads) */ }; -extern ulint srv_n_threads_active[]; #else /* !UNIV_HOTBACKUP */ # define srv_use_adaptive_hash_indexes FALSE -# define srv_use_checksums TRUE # define srv_use_native_aio FALSE # define srv_force_recovery 0UL # define srv_set_io_thread_op_info(t,info) ((void) 0) diff --git a/storage/innobase/include/srv0srv.ic b/storage/innobase/include/srv0srv.ic index 8a1a678a016..53405c06f97 100644 --- a/storage/innobase/include/srv0srv.ic +++ b/storage/innobase/include/srv0srv.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h index 796d2cade3b..9d948675011 100644 --- a/storage/innobase/include/srv0start.h +++ b/storage/innobase/include/srv0start.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -27,8 +27,15 @@ Created 10/10/1995 Heikki Tuuri #define srv0start_h #include "univ.i" +#include "log0log.h" #include "ut0byte.h" +#ifdef __WIN__ +#define SRV_PATH_SEPARATOR '\\' +#else +#define SRV_PATH_SEPARATOR '/' +#endif + /*********************************************************************//** Normalizes a directory path for Windows: converts slashes to backslashes. */ UNIV_INTERN @@ -85,11 +92,19 @@ Shuts down the Innobase database. UNIV_INTERN int innobase_shutdown_for_mysql(void); + +/******************************************************************** +Signal all per-table background threads to shutdown, and wait for them to do +so. */ + +void +srv_shutdown_table_bg_threads(void); + /*=============================*/ /** Log sequence number at shutdown */ -extern ib_uint64_t srv_shutdown_lsn; +extern lsn_t srv_shutdown_lsn; /** Log sequence number immediately after startup */ -extern ib_uint64_t srv_start_lsn; +extern lsn_t srv_start_lsn; #ifdef HAVE_DARWIN_THREADS /** TRUE if the F_FULLFSYNC option is available */ @@ -113,6 +128,11 @@ enum srv_shutdown_state { SRV_SHUTDOWN_NONE = 0, /*!< Database running normally */ SRV_SHUTDOWN_CLEANUP, /*!< Cleaning up in logs_empty_and_mark_files_at_shutdown() */ + SRV_SHUTDOWN_FLUSH_PHASE,/*!< At this phase the master and the + purge threads must have completed their + work. Once we enter this phase the + page_cleaner can clean up the buffer + pool and exit */ SRV_SHUTDOWN_LAST_PHASE,/*!< Last phase after ensuring that the buffer pool can be freed: flush all file spaces and close all files */ diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h index 6e931346238..56f9ff78c49 100644 --- a/storage/innobase/include/sync0arr.h +++ b/storage/innobase/include/sync0arr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -36,32 +36,6 @@ typedef struct sync_cell_struct sync_cell_t; /** Synchronization wait array */ typedef struct sync_array_struct sync_array_t; -/** Parameters for sync_array_create() @{ */ -#define SYNC_ARRAY_OS_MUTEX 1 /*!< protected by os_mutex_t */ -#define SYNC_ARRAY_MUTEX 2 /*!< protected by mutex_t */ -/* @} */ - -/*******************************************************************//** -Creates a synchronization wait array. It is protected by a mutex -which is automatically reserved when the functions operating on it -are called. -@return own: created wait array */ -UNIV_INTERN -sync_array_t* -sync_array_create( -/*==============*/ - ulint n_cells, /*!< in: number of cells in the array - to create */ - ulint protection); /*!< in: either SYNC_ARRAY_OS_MUTEX or - SYNC_ARRAY_MUTEX: determines the type - of mutex protecting the data structure */ -/******************************************************************//** -Frees the resources in a wait array. */ -UNIV_INTERN -void -sync_array_free( -/*============*/ - sync_array_t* arr); /*!< in, own: sync wait array */ /******************************************************************//** Reserves a wait array cell for waiting for an object. The event of the cell is reset to nonsignalled state. */ @@ -99,9 +73,9 @@ sync_array_free_cell( Note that one of the wait objects was signalled. */ UNIV_INTERN void -sync_array_object_signalled( -/*========================*/ - sync_array_t* arr); /*!< in: wait array */ +sync_array_object_signalled(void); +/*=============================*/ + /**********************************************************************//** If the wakeup algorithm does not work perfectly at semaphore relases, this function will do the waking (see the comment in mutex_exit). This @@ -132,11 +106,30 @@ sync_array_validate( Prints info of the wait array. */ UNIV_INTERN void -sync_array_print_info( +sync_array_print( +/*=============*/ + FILE* file); /*!< in: file where to print */ + +/**********************************************************************//** +Create the primary system wait array(s), they are protected by an OS mutex */ +UNIV_INTERN +void +sync_array_init( +/*============*/ + ulint n_threads); /*!< in: Number of slots to create */ +/**********************************************************************//** +Close sync array wait sub-system. */ +UNIV_INTERN +void +sync_array_close(void); /*==================*/ - FILE* file, /*!< in: file where to print */ - sync_array_t* arr); /*!< in: wait array */ +/**********************************************************************//** +Get an instance of the sync wait array. */ +UNIV_INTERN +sync_array_t* +sync_array_get(void); +/*================*/ #ifndef UNIV_NONINL #include "sync0arr.ic" diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic index bf57f5b2dc2..0114a1ff5a2 100644 --- a/storage/innobase/include/sync0arr.ic +++ b/storage/innobase/include/sync0arr.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -24,4 +24,3 @@ Inline code Created 9/5/1995 Heikki Tuuri *******************************************************/ - diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index 2cab266d86a..b0c21d0c76b 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -117,12 +117,17 @@ extern mysql_pfs_key_t buf_block_lock_key; extern mysql_pfs_key_t buf_block_debug_latch_key; # endif /* UNIV_SYNC_DEBUG */ extern mysql_pfs_key_t dict_operation_lock_key; -extern mysql_pfs_key_t fil_space_latch_key; extern mysql_pfs_key_t checkpoint_lock_key; +extern mysql_pfs_key_t fil_space_latch_key; +extern mysql_pfs_key_t fts_cache_rw_lock_key; +extern mysql_pfs_key_t fts_cache_init_rw_lock_key; +extern mysql_pfs_key_t index_tree_rw_lock_key; extern mysql_pfs_key_t trx_i_s_cache_lock_key; extern mysql_pfs_key_t trx_purge_latch_key; extern mysql_pfs_key_t index_tree_rw_lock_key; extern mysql_pfs_key_t dict_table_stats_latch_key; +extern mysql_pfs_key_t trx_sys_rw_lock_key; +extern mysql_pfs_key_t hash_table_rw_lock_key; #endif /* UNIV_PFS_RWLOCK */ @@ -154,9 +159,6 @@ unlocking, not the corresponding function. */ # define rw_lock_s_lock(M) \ rw_lock_s_lock_func((M), 0, __FILE__, __LINE__) -# define rw_lock_s_lock_inline(M, P, F, L) \ - rw_lock_s_lock_func((M), (P), (F), (L)) - # define rw_lock_s_lock_gen(M, P) \ rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) @@ -173,18 +175,12 @@ unlocking, not the corresponding function. */ # define rw_lock_x_lock(M) \ rw_lock_x_lock_func((M), 0, __FILE__, __LINE__) -# define rw_lock_x_lock_inline(M, P, F, L) \ - rw_lock_x_lock_func((M), (P), (F), (L)) - # define rw_lock_x_lock_gen(M, P) \ rw_lock_x_lock_func((M), (P), __FILE__, __LINE__) # define rw_lock_x_lock_nowait(M) \ rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__) -# define rw_lock_x_lock_func_nowait_inline(M, F, L) \ - rw_lock_x_lock_func_nowait((M), (F), (L)) - # ifdef UNIV_SYNC_DEBUG # define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L) # else @@ -216,9 +212,6 @@ unlocking, not the corresponding function. */ # define rw_lock_s_lock(M) \ pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__) -# define rw_lock_s_lock_inline(M, P, F, L) \ - pfs_rw_lock_s_lock_func((M), (P), (F), (L)) - # define rw_lock_s_lock_gen(M, P) \ pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) @@ -234,18 +227,12 @@ unlocking, not the corresponding function. */ # define rw_lock_x_lock(M) \ pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__) -# define rw_lock_x_lock_inline(M, P, F, L) \ - pfs_rw_lock_x_lock_func((M), (P), (F), (L)) - # define rw_lock_x_lock_gen(M, P) \ pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__) # define rw_lock_x_lock_nowait(M) \ pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__) -# define rw_lock_x_lock_func_nowait_inline(M, F, L) \ - pfs_rw_lock_x_lock_func_nowait((M), (F), (L)) - # ifdef UNIV_SYNC_DEBUG # define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L) # else @@ -419,22 +406,6 @@ rw_lock_x_lock_move_ownership( rw_lock_t* lock); /*!< in: lock which was x-locked in the buffer read */ /******************************************************************//** -Releases a shared mode lock when we know there are no waiters and none -else will access the lock during the time this function is executed. */ -UNIV_INLINE -void -rw_lock_s_unlock_direct( -/*====================*/ - rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** -Releases an exclusive mode lock when we know there are no waiters, and -none else will access the lock durint the time this function is executed. */ -UNIV_INLINE -void -rw_lock_x_unlock_direct( -/*====================*/ - rw_lock_t* lock); /*!< in/out: rw-lock */ -/******************************************************************//** Returns the value of writer_count for the lock. Does not reserve the lock mutex, so the caller must be sure it is not changed during the call. @return value of writer_count */ @@ -607,7 +578,7 @@ struct rw_lock_struct { /*!< Thread id of writer thread. Is only guaranteed to have sane and non-stale value iff recursive flag is set. */ - os_event_t event; /*!< Used by sync0arr.c for thread queueing */ + os_event_t event; /*!< Used by sync0arr.cc for thread queueing */ os_event_t wait_ex_event; /*!< Event for next-writer to wait on. A thread must decrement lock_word before waiting. */ @@ -689,9 +660,6 @@ rw_lock_s_lock_gen() rw_lock_s_lock_nowait() rw_lock_s_unlock_gen() rw_lock_free() - -Two function APIs rw_lock_x_unlock_direct() and rw_lock_s_unlock_direct() -do not have any caller/user, they are not instrumented. */ #ifdef UNIV_PFS_RWLOCK diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic index a5a7cda14f9..eab89e2619e 100644 --- a/storage/innobase/include/sync0rw.ic +++ b/storage/innobase/include/sync0rw.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -90,7 +90,7 @@ rw_lock_set_waiter_flag( rw_lock_t* lock) /*!< in/out: rw-lock */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - (void) os_compare_and_swap_ulint(&lock->waiters, 0, 1); + os_compare_and_swap_ulint(&lock->waiters, 0, 1); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ lock->waiters = 1; #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ @@ -107,7 +107,7 @@ rw_lock_reset_waiter_flag( rw_lock_t* lock) /*!< in/out: rw-lock */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - (void) os_compare_and_swap_ulint(&lock->waiters, 1, 0); + os_compare_and_swap_ulint(&lock->waiters, 1, 0); #else /* INNODB_RW_LOCKS_USE_ATOMICS */ lock->waiters = 0; #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ @@ -131,7 +131,7 @@ rw_lock_get_writer( } else if (((-lock_word) % X_LOCK_DECR) == 0) { return(RW_LOCK_EX); } else { - ut_ad(lock_word > -X_LOCK_DECR); + ut_ad(lock_word > -X_LOCK_DECR); return(RW_LOCK_WAIT_EX); } } @@ -200,7 +200,7 @@ rw_lock_lock_word_decr( ulint amount) /*!< in: amount to decrement */ { #ifdef INNODB_RW_LOCKS_USE_ATOMICS - lint local_lock_word = lock->lock_word; + lint local_lock_word = lock->lock_word; while (local_lock_word > 0) { if (os_compare_and_swap_lint(&lock->lock_word, local_lock_word, @@ -244,7 +244,7 @@ rw_lock_lock_word_incr( mutex_exit(&(lock->mutex)); - return(local_lock_word); + return(local_lock_word); #endif /* INNODB_RW_LOCKS_USE_ATOMICS */ } @@ -308,7 +308,6 @@ rw_lock_s_lock_low( const char* file_name, /*!< in: file name where lock requested */ ulint line) /*!< in: line where requested */ { - /* TODO: study performance of UNIV_LIKELY branch prediction hints. */ if (!rw_lock_lock_word_decr(lock, 1)) { /* Locking did not succeed */ return(FALSE); @@ -318,7 +317,7 @@ rw_lock_s_lock_low( rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line); #endif /* These debugging values are not set safely: they may be incorrect - or even refer to a line that is invalid for the file name. */ + or even refer to a line that is invalid for the file name. */ lock->last_s_file_name = file_name; lock->last_s_line = line; @@ -409,7 +408,6 @@ rw_lock_s_lock_func( ut_ad(!rw_lock_own(lock, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - /* TODO: study performance of UNIV_LIKELY branch prediction hints. */ if (rw_lock_s_lock_low(lock, pass, file_name, line)) { return; /* Success */ @@ -462,8 +460,12 @@ rw_lock_x_lock_func_nowait( there is an exclusive writer and this is the writer thread. */ lock->lock_word -= X_LOCK_DECR; + /* Recursive x-locks must be multiples of X_LOCK_DECR. */ ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0); + /* Watch for too many recursive locks */ + ut_ad(lock->lock_word < 0); + } else { /* Failure */ return(FALSE); @@ -502,10 +504,10 @@ rw_lock_s_unlock_func( if (rw_lock_lock_word_incr(lock, 1) == 0) { /* wait_ex waiter exists. It may not be asleep, but we signal - anyway. We do not wake other waiters, because they can't - exist without wait_ex waiter and wait_ex waiter goes first.*/ + anyway. We do not wake other waiters, because they can't + exist without wait_ex waiter and wait_ex waiter goes first.*/ os_event_set(lock->wait_ex_event); - sync_array_object_signalled(sync_primary_wait_array); + sync_array_object_signalled(); } @@ -517,31 +519,6 @@ rw_lock_s_unlock_func( } /******************************************************************//** -Releases a shared mode lock when we know there are no waiters and none -else will access the lock during the time this function is executed. */ -UNIV_INLINE -void -rw_lock_s_unlock_direct( -/*====================*/ - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - ut_ad(lock->lock_word < X_LOCK_DECR); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED); -#endif - - /* Decrease reader count by incrementing lock_word */ - lock->lock_word++; - - ut_ad(!lock->waiters); - ut_ad(rw_lock_validate(lock)); -#ifdef UNIV_SYNC_PERF_STAT - rw_s_exit_count++; -#endif -} - -/******************************************************************//** Releases an exclusive mode lock. */ UNIV_INLINE void @@ -564,6 +541,8 @@ rw_lock_x_unlock_func( if (lock->lock_word == 0) { /* Last caller in a possible recursive chain. */ lock->recursive = FALSE; + UNIV_MEM_INVALID(&lock->writer_thread, + sizeof lock->writer_thread); } #ifdef UNIV_SYNC_DEBUG @@ -572,12 +551,12 @@ rw_lock_x_unlock_func( if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) { /* Lock is now free. May have to signal read/write waiters. - We do not need to signal wait_ex waiters, since they cannot - exist when there is a writer. */ + We do not need to signal wait_ex waiters, since they cannot + exist when there is a writer. */ if (lock->waiters) { rw_lock_reset_waiter_flag(lock); os_event_set(lock->event); - sync_array_object_signalled(sync_primary_wait_array); + sync_array_object_signalled(); } } @@ -588,38 +567,6 @@ rw_lock_x_unlock_func( #endif } -/******************************************************************//** -Releases an exclusive mode lock when we know there are no waiters, and -none else will access the lock during the time this function is executed. */ -UNIV_INLINE -void -rw_lock_x_unlock_direct( -/*====================*/ - rw_lock_t* lock) /*!< in/out: rw-lock */ -{ - /* Reset the exclusive lock if this thread no longer has an x-mode - lock */ - - ut_ad((lock->lock_word % X_LOCK_DECR) == 0); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX); -#endif - - if (lock->lock_word == 0) { - lock->recursive = FALSE; - } - - lock->lock_word += X_LOCK_DECR; - - ut_ad(!lock->waiters); - ut_ad(rw_lock_validate(lock)); - -#ifdef UNIV_SYNC_PERF_STAT - rw_x_exit_count++; -#endif -} - #ifdef UNIV_PFS_RWLOCK /******************************************************************//** @@ -643,9 +590,7 @@ pfs_rw_lock_create_func( ulint cline) /*!< in: file line where created */ { /* Initialize the rwlock for performance schema */ - lock->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key)) - ? PSI_server->init_rwlock(key, lock) - : NULL; + lock->pfs_psi = PSI_CALL(init_rwlock)(key, lock); /* The actual function to initialize an rwlock */ rw_lock_create_func(lock, @@ -656,7 +601,7 @@ pfs_rw_lock_create_func( cmutex_name, # endif /* UNIV_DEBUG */ cfile_name, - cline); + cline); } /******************************************************************//** Performance schema instrumented wrap function for rw_lock_x_lock_func() @@ -672,24 +617,23 @@ pfs_rw_lock_x_lock_func( const char* file_name,/*!< in: file name where lock requested */ ulint line) /*!< in: line where requested */ { - struct PSI_rwlock_locker* locker = NULL; - PSI_rwlock_locker_state state; + if (lock->pfs_psi != NULL) + { + PSI_rwlock_locker* locker; + PSI_rwlock_locker_state state; - /* Record the entry of rw x lock request in performance schema */ - if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { - locker = PSI_server->get_thread_rwlock_locker( - &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK); - - if (locker) { - PSI_server->start_rwlock_wrwait(locker, - file_name, line); - } - } + /* Record the entry of rw x lock request in performance schema */ + locker = PSI_CALL(start_rwlock_wrwait)( + &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, file_name, line); - rw_lock_x_lock_func(lock, pass, file_name, line); + rw_lock_x_lock_func(lock, pass, file_name, line); - if (locker) { - PSI_server->end_rwlock_wrwait(locker, 0); + if (locker != NULL) + PSI_CALL(end_rwlock_wrwait)(locker, 0); + } + else + { + rw_lock_x_lock_func(lock, pass, file_name, line); } } /******************************************************************//** @@ -707,25 +651,25 @@ pfs_rw_lock_x_lock_func_nowait( requested */ ulint line) /*!< in: line where requested */ { - struct PSI_rwlock_locker* locker = NULL; - PSI_rwlock_locker_state state; ibool ret; - /* Record the entry of rw x lock request in performance schema */ - if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { - locker = PSI_server->get_thread_rwlock_locker( - &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK); + if (lock->pfs_psi != NULL) + { + PSI_rwlock_locker* locker; + PSI_rwlock_locker_state state; - if (locker) { - PSI_server->start_rwlock_wrwait(locker, - file_name, line); - } - } + /* Record the entry of rw x lock request in performance schema */ + locker = PSI_CALL(start_rwlock_wrwait)( + &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK, file_name, line); - ret = rw_lock_x_lock_func_nowait(lock, file_name, line); + ret = rw_lock_x_lock_func_nowait(lock, file_name, line); - if (locker) { - PSI_server->end_rwlock_wrwait(locker, 0); + if (locker != NULL) + PSI_CALL(end_rwlock_wrwait)(locker, ret); + } + else + { + ret = rw_lock_x_lock_func_nowait(lock, file_name, line); } return(ret); @@ -740,8 +684,9 @@ pfs_rw_lock_free_func( /*==================*/ rw_lock_t* lock) /*!< in: pointer to rw-lock */ { - if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { - PSI_server->destroy_rwlock(lock->pfs_psi); + if (lock->pfs_psi != NULL) + { + PSI_CALL(destroy_rwlock)(lock->pfs_psi); lock->pfs_psi = NULL; } @@ -763,24 +708,26 @@ pfs_rw_lock_s_lock_func( requested */ ulint line) /*!< in: line where requested */ { - struct PSI_rwlock_locker* locker = NULL; - PSI_rwlock_locker_state state; - - /* Instrumented to inform we are aquiring a shared rwlock */ - if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { - locker = PSI_server->get_thread_rwlock_locker( - &state, lock->pfs_psi, PSI_RWLOCK_READLOCK); - if (locker) { - PSI_server->start_rwlock_rdwait(locker, - file_name, line); - } - } + if (lock->pfs_psi != NULL) + { + PSI_rwlock_locker* locker; + PSI_rwlock_locker_state state; - rw_lock_s_lock_func(lock, pass, file_name, line); + /* Instrumented to inform we are aquiring a shared rwlock */ + locker = PSI_CALL(start_rwlock_rdwait)( + &state, lock->pfs_psi, PSI_RWLOCK_READLOCK, file_name, line); - if (locker) { - PSI_server->end_rwlock_rdwait(locker, 0); + rw_lock_s_lock_func(lock, pass, file_name, line); + + if (locker != NULL) + PSI_CALL(end_rwlock_rdwait)(locker, 0); + } + else + { + rw_lock_s_lock_func(lock, pass, file_name, line); } + + return; } /******************************************************************//** Performance schema instrumented wrap function for rw_lock_s_lock_func() @@ -798,24 +745,25 @@ pfs_rw_lock_s_lock_low( const char* file_name, /*!< in: file name where lock requested */ ulint line) /*!< in: line where requested */ { - struct PSI_rwlock_locker* locker = NULL; - PSI_rwlock_locker_state state; ibool ret; - /* Instrumented to inform we are aquiring a shared rwlock */ - if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { - locker = PSI_server->get_thread_rwlock_locker( - &state, lock->pfs_psi, PSI_RWLOCK_READLOCK); - if (locker) { - PSI_server->start_rwlock_rdwait(locker, - file_name, line); - } - } + if (lock->pfs_psi != NULL) + { + PSI_rwlock_locker* locker; + PSI_rwlock_locker_state state; + + /* Instrumented to inform we are aquiring a shared rwlock */ + locker = PSI_CALL(start_rwlock_rdwait)( + &state, lock->pfs_psi, PSI_RWLOCK_READLOCK, file_name, line); - ret = rw_lock_s_lock_low(lock, pass, file_name, line); + ret = rw_lock_s_lock_low(lock, pass, file_name, line); - if (locker) { - PSI_server->end_rwlock_rdwait(locker, 0); + if (locker != NULL) + PSI_CALL(end_rwlock_rdwait)(locker, ret); + } + else + { + ret = rw_lock_s_lock_low(lock, pass, file_name, line); } return(ret); @@ -837,9 +785,8 @@ pfs_rw_lock_x_unlock_func( rw_lock_t* lock) /*!< in/out: rw-lock */ { /* Inform performance schema we are unlocking the lock */ - if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { - PSI_server->unlock_rwlock(lock->pfs_psi); - } + if (lock->pfs_psi != NULL) + PSI_CALL(unlock_rwlock)(lock->pfs_psi); rw_lock_x_unlock_func( #ifdef UNIV_SYNC_DEBUG @@ -864,9 +811,8 @@ pfs_rw_lock_s_unlock_func( rw_lock_t* lock) /*!< in/out: rw-lock */ { /* Inform performance schema we are unlocking the lock */ - if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) { - PSI_server->unlock_rwlock(lock->pfs_psi); - } + if (lock->pfs_psi != NULL) + PSI_CALL(unlock_rwlock)(lock->pfs_psi); rw_lock_s_unlock_func( #ifdef UNIV_SYNC_DEBUG diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 9b07c4758c9..1adcf938903 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -42,7 +42,7 @@ Created 9/5/1995 Heikki Tuuri #include "sync0arr.h" #if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP) -extern my_bool timed_mutexes; +extern "C" my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ #ifdef HAVE_WINDOWS_ATOMICS @@ -53,25 +53,19 @@ typedef byte lock_word_t; #endif #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK -/* There are mutexes/rwlocks that we want to exclude from -instrumentation even if their corresponding performance schema -define is set. And this PFS_NOT_INSTRUMENTED is used -as the key value to dentify those objects that would -be excluded from instrumentation. */ -# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED - -# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED) /* By default, buffer mutexes and rwlocks will be excluded from instrumentation due to their large number of instances. */ # define PFS_SKIP_BUFFER_MUTEX_RWLOCK +/* By default, event->mutex will also be excluded from instrumentation */ +# define PFS_SKIP_EVENT_MUTEX + #endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */ #ifdef UNIV_PFS_MUTEX /* Key defines to register InnoDB mutexes with performance schema */ extern mysql_pfs_key_t autoinc_mutex_key; -extern mysql_pfs_key_t btr_search_enabled_mutex_key; extern mysql_pfs_key_t buffer_block_mutex_key; extern mysql_pfs_key_t buf_pool_mutex_key; extern mysql_pfs_key_t buf_pool_zip_mutex_key; @@ -81,13 +75,19 @@ extern mysql_pfs_key_t dict_sys_mutex_key; extern mysql_pfs_key_t file_format_max_mutex_key; extern mysql_pfs_key_t fil_system_mutex_key; extern mysql_pfs_key_t flush_list_mutex_key; +extern mysql_pfs_key_t fts_bg_threads_mutex_key; +extern mysql_pfs_key_t fts_delete_mutex_key; +extern mysql_pfs_key_t fts_optimize_mutex_key; +extern mysql_pfs_key_t fts_doc_id_mutex_key; extern mysql_pfs_key_t hash_table_mutex_key; extern mysql_pfs_key_t ibuf_bitmap_mutex_key; extern mysql_pfs_key_t ibuf_mutex_key; extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; extern mysql_pfs_key_t log_sys_mutex_key; extern mysql_pfs_key_t log_flush_order_mutex_key; -extern mysql_pfs_key_t kernel_mutex_key; +# ifndef HAVE_ATOMIC_BUILTINS +extern mysql_pfs_key_t server_mutex_key; +# endif /* !HAVE_ATOMIC_BUILTINS */ # ifdef UNIV_MEM_DEBUG extern mysql_pfs_key_t mem_hash_mutex_key; # endif /* UNIV_MEM_DEBUG */ @@ -104,13 +104,25 @@ extern mysql_pfs_key_t rw_lock_mutex_key; extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key; extern mysql_pfs_key_t srv_innodb_monitor_mutex_key; extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key; +extern mysql_pfs_key_t srv_threads_mutex_key; extern mysql_pfs_key_t srv_monitor_file_mutex_key; -extern mysql_pfs_key_t syn_arr_mutex_key; # ifdef UNIV_SYNC_DEBUG extern mysql_pfs_key_t sync_thread_mutex_key; # endif /* UNIV_SYNC_DEBUG */ -extern mysql_pfs_key_t trx_doublewrite_mutex_key; +extern mysql_pfs_key_t buf_dblwr_mutex_key; extern mysql_pfs_key_t trx_undo_mutex_key; +extern mysql_pfs_key_t trx_mutex_key; +extern mysql_pfs_key_t lock_sys_mutex_key; +extern mysql_pfs_key_t lock_sys_wait_mutex_key; +extern mysql_pfs_key_t trx_sys_mutex_key; +extern mysql_pfs_key_t srv_sys_mutex_key; +extern mysql_pfs_key_t srv_sys_tasks_mutex_key; +#ifndef HAVE_ATOMIC_BUILTINS +extern mysql_pfs_key_t srv_conc_mutex_key; +#endif /* !HAVE_ATOMIC_BUILTINS */ +extern mysql_pfs_key_t event_os_mutex_key; +extern mysql_pfs_key_t ut_list_mutex_key; +extern mysql_pfs_key_t os_mutex_key; #endif /* UNIV_PFS_MUTEX */ /******************************************************************//** @@ -591,10 +603,23 @@ V File system pages | V -Kernel mutex If a kernel operation needs a file -| page allocation, it must reserve the -| fsp x-latch before acquiring the kernel -| mutex. +lock_sys_wait_mutex Mutex protecting lock timeout data +| +V +lock_sys_mutex Mutex protecting lock_sys_t +| +V +trx_sys->mutex Mutex protecting trx_sys_t +| +V +Threads mutex Background thread scheduling mutex +| +V +query_thr_mutex Mutex protecting query threads +| +V +trx_mutex Mutex protecting trx_t fields +| V Search system mutex | @@ -609,7 +634,8 @@ Any other latch V Memory pool mutex */ -/* Latching order levels */ +/* Latching order levels. If you modify these, you have to also update +sync_thread_add_level(). */ /* User transaction locks are higher than any of the latch levels below: no latches are allowed when a thread goes to wait for a normal table @@ -629,10 +655,11 @@ or row lock! */ trx_i_s_cache_t::last_read_mutex */ #define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the file format tag */ -#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve +#define SYNC_DICT_OPERATION 1010 /* table create, drop, etc. reserve this in X-mode; implicit or backround operations purge, rollback, foreign key checks reserve this in S-mode */ +#define SYNC_FTS_CACHE 1005 /* FTS cache rwlock */ #define SYNC_DICT 1000 #define SYNC_DICT_AUTOINC_MUTEX 999 #define SYNC_DICT_HEADER 995 @@ -663,14 +690,20 @@ or row lock! */ /*------------------------------------- MySQL query cache mutex */ /*------------------------------------- MySQL binlog mutex */ /*-------------------------------*/ -#define SYNC_KERNEL 300 -#define SYNC_REC_LOCK 299 -#define SYNC_TRX_LOCK_HEAP 298 +#define SYNC_LOCK_WAIT_SYS 300 +#define SYNC_LOCK_SYS 299 +#define SYNC_TRX_SYS 298 +#define SYNC_TRX 297 +#define SYNC_THREADS 295 +#define SYNC_REC_LOCK 294 #define SYNC_TRX_SYS_HEADER 290 #define SYNC_PURGE_QUEUE 200 #define SYNC_LOG 170 #define SYNC_LOG_FLUSH_ORDER 147 #define SYNC_RECV 168 +#define SYNC_FTS_CACHE_INIT 166 /* Used for FTS cache initialization */ +#define SYNC_FTS_BG_THREADS 165 +#define SYNC_FTS_OPTIMIZE 164 // FIXME: is this correct number, test #define SYNC_WORK_QUEUE 162 #define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory heap that can be extended to the @@ -679,6 +712,7 @@ or row lock! */ can call routines there! Otherwise the level is SYNC_MEM_HASH. */ #define SYNC_BUF_POOL 150 /* Buffer pool mutex */ +#define SYNC_BUF_PAGE_HASH 149 /* buf_pool->page_hash rw_lock */ #define SYNC_BUF_BLOCK 146 /* Block mutex */ #define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */ #define SYNC_DOUBLEWRITE 140 @@ -700,7 +734,7 @@ implementation of a mutual exclusion semaphore. */ /** InnoDB mutex */ struct mutex_struct { - os_event_t event; /*!< Used by sync0arr.c for the wait queue */ + os_event_t event; /*!< Used by sync0arr.cc for the wait queue */ volatile lock_word_t lock_word; /*!< lock_word is the target of the atomic test-and-set instruction when atomic operations are enabled. */ @@ -747,11 +781,6 @@ struct mutex_struct { #endif }; -/** The global array of wait cells for implementation of the databases own -mutexes and read-write locks. */ -extern sync_array_t* sync_primary_wait_array;/* Appears here for - debugging purposes only! */ - /** Constant determining how long spin wait is continued before suspending the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond to 20 microseconds. */ @@ -777,6 +806,30 @@ extern ut_list_base_node_t mutex_list; /** Mutex protecting the mutex_list variable */ extern mutex_t mutex_list_mutex; +#ifndef HAVE_ATOMIC_BUILTINS +/**********************************************************//** +Function that uses a mutex to decrement a variable atomically */ +UNIV_INLINE +void +os_atomic_dec_ulint_func( +/*=====================*/ + mutex_t* mutex, /*!< in: mutex guarding the + decrement */ + volatile ulint* var, /*!< in/out: variable to + decrement */ + ulint delta); /*!< in: delta to decrement */ +/**********************************************************//** +Function that uses a mutex to increment a variable atomically */ +UNIV_INLINE +void +os_atomic_inc_ulint_func( +/*=====================*/ + mutex_t* mutex, /*!< in: mutex guarding the + increment */ + volatile ulint* var, /*!< in/out: variable to + increment */ + ulint delta); /*!< in: delta to increment */ +#endif /* !HAVE_ATOMIC_BUILTINS */ #ifndef UNIV_NONINL #include "sync0sync.ic" diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic index eb21f44c65e..746e73ebee7 100644 --- a/storage/innobase/include/sync0sync.ic +++ b/storage/innobase/include/sync0sync.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -18,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -94,7 +94,7 @@ mutex_test_and_set( mutex->lock_word = 1; } - return((byte)ret); + return((byte) ret); #endif } @@ -236,22 +236,22 @@ pfs_mutex_enter_func( const char* file_name, /*!< in: file name where locked */ ulint line) /*!< in: line where locked */ { - struct PSI_mutex_locker* locker = NULL; - PSI_mutex_locker_state state; - int result = 0; - - if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { - locker = PSI_server->get_thread_mutex_locker( - &state, mutex->pfs_psi, PSI_MUTEX_LOCK); - if (locker) { - PSI_server->start_mutex_wait(locker, file_name, line); - } - } + if (mutex->pfs_psi != NULL) + { + PSI_mutex_locker* locker; + PSI_mutex_locker_state state; + + locker = PSI_CALL(start_mutex_wait)(&state, mutex->pfs_psi, + PSI_MUTEX_LOCK, file_name, line); - mutex_enter_func(mutex, file_name, line); + mutex_enter_func(mutex, file_name, line); - if (locker) { - PSI_server->end_mutex_wait(locker, result); + if (locker != NULL) + PSI_CALL(end_mutex_wait)(locker, 0); + } + else + { + mutex_enter_func(mutex, file_name, line); } } /********************************************************************//** @@ -270,21 +270,23 @@ pfs_mutex_enter_nowait_func( ulint line) /*!< in: line where requested */ { ulint ret; - struct PSI_mutex_locker* locker = NULL; - PSI_mutex_locker_state state; - - if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { - locker = PSI_server->get_thread_mutex_locker( - &state, mutex->pfs_psi, PSI_MUTEX_TRYLOCK); - if (locker) { - PSI_server->start_mutex_wait(locker, file_name, line); - } - } - ret = mutex_enter_nowait_func(mutex, file_name, line); + if (mutex->pfs_psi != NULL) + { + PSI_mutex_locker* locker; + PSI_mutex_locker_state state; + + locker = PSI_CALL(start_mutex_wait)(&state, mutex->pfs_psi, + PSI_MUTEX_TRYLOCK, file_name, line); + + ret = mutex_enter_nowait_func(mutex, file_name, line); - if (locker) { - PSI_server->end_mutex_wait(locker, ret); + if (locker != NULL) + PSI_CALL(end_mutex_wait)(locker, (int) ret); + } + else + { + ret = mutex_enter_nowait_func(mutex, file_name, line); } return(ret); @@ -300,9 +302,8 @@ pfs_mutex_exit_func( /*================*/ mutex_t* mutex) /*!< in: pointer to mutex */ { - if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { - PSI_server->unlock_mutex(mutex->pfs_psi); - } + if (mutex->pfs_psi != NULL) + PSI_CALL(unlock_mutex)(mutex->pfs_psi); mutex_exit_func(mutex); } @@ -328,9 +329,7 @@ pfs_mutex_create_func( const char* cfile_name, /*!< in: file name where created */ ulint cline) /*!< in: file line where created */ { - mutex->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key)) - ? PSI_server->init_mutex(key, mutex) - : NULL; + mutex->pfs_psi = PSI_CALL(init_mutex)(key, mutex); mutex_create_func(mutex, # ifdef UNIV_DEBUG @@ -353,8 +352,9 @@ pfs_mutex_free_func( /*================*/ mutex_t* mutex) /*!< in: mutex */ { - if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) { - PSI_server->destroy_mutex(mutex->pfs_psi); + if (mutex->pfs_psi != NULL) + { + PSI_CALL(destroy_mutex)(mutex->pfs_psi); mutex->pfs_psi = NULL; } @@ -362,3 +362,43 @@ pfs_mutex_free_func( } #endif /* UNIV_PFS_MUTEX */ + +#ifndef HAVE_ATOMIC_BUILTINS +/**********************************************************//** +Function that uses a mutex to decrement a variable atomically */ +UNIV_INLINE +void +os_atomic_dec_ulint_func( +/*=====================*/ + mutex_t* mutex, /*!< in: mutex guarding the dec */ + volatile ulint* var, /*!< in/out: variable to decrement */ + ulint delta) /*!< in: delta to decrement */ +{ + mutex_enter(mutex); + + /* I don't think we will encounter a situation where + this check will not be required. */ + ut_ad(*var >= delta); + + *var -= delta; + + mutex_exit(mutex); +} + +/**********************************************************//** +Function that uses a mutex to increment a variable atomically */ +UNIV_INLINE +void +os_atomic_inc_ulint_func( +/*=====================*/ + mutex_t* mutex, /*!< in: mutex guarding the increment */ + volatile ulint* var, /*!< in/out: variable to increment */ + ulint delta) /*!< in: delta to increment */ +{ + mutex_enter(mutex); + + *var += delta; + + mutex_exit(mutex); +} +#endif /* !HAVE_ATOMIC_BUILTINS */ diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h index 1911bbac7fd..679cf6a9074 100644 --- a/storage/innobase/include/sync0types.h +++ b/storage/innobase/include/sync0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h index 73896a3cb76..c286fc4d9ae 100644 --- a/storage/innobase/include/trx0i_s.h +++ b/storage/innobase/include/trx0i_s.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -66,13 +66,15 @@ do { \ strncpy(buff, data, constraint); \ buff[constraint] = '\0'; \ \ - field = ha_storage_put_memlim( \ + field = static_cast<const char*>( \ + ha_storage_put_memlim( \ (tcache)->storage, buff, constraint + 1,\ - MAX_ALLOWED_FOR_STORAGE(tcache)); \ + MAX_ALLOWED_FOR_STORAGE(tcache))); \ } else { \ - field = ha_storage_put_str_memlim( \ + field = static_cast<const char*>( \ + ha_storage_put_str_memlim( \ (tcache)->storage, data, \ - MAX_ALLOWED_FOR_STORAGE(tcache)); \ + MAX_ALLOWED_FOR_STORAGE(tcache))); \ } \ } while (0) @@ -173,6 +175,11 @@ struct i_s_trx_row_struct { ulint trx_search_latch_timeout; /*!< search_latch_timeout in trx_struct */ + ulint trx_is_read_only; + /*!< trx_t::read_only */ + ulint trx_is_autocommit_non_locking; + /*!< trx_is_autocommit_non_locking(trx) + */ }; /** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */ diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index 2bd9e64476b..0199083467c 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -52,17 +52,6 @@ trx_purge_get_log_from_hist( /*========================*/ fil_addr_t node_addr); /*!< in: file address of the history list node of the log */ -/*****************************************************************//** -Checks if trx_id is >= purge_view: then it is guaranteed that its update -undo log still exists in the system. -@return TRUE if is sure that it is preserved, also if the function -returns FALSE, it is possible that the undo log still exists in the -system */ -UNIV_INTERN -ibool -trx_purge_update_undo_must_exist( -/*=============================*/ - trx_id_t trx_id);/*!< in: transaction id */ /********************************************************************//** Creates the global purge system control structure and inits the history mutex. */ @@ -70,7 +59,8 @@ UNIV_INTERN void trx_purge_sys_create( /*=================*/ - ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/ + ulint n_purge_threads,/*!< in: number of purge threads */ + ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/ /********************************************************************//** Frees the global purge system control structure. */ UNIV_INTERN @@ -88,26 +78,6 @@ trx_purge_add_update_undo_to_history( page_t* undo_page, /*!< in: update undo log header page, x-latched */ mtr_t* mtr); /*!< in: mtr */ -/********************************************************************//** -Fetches the next undo log record from the history list to purge. It must be -released with the corresponding release function. -@return copy of an undo log record or pointer to trx_purge_dummy_rec, -if the whole undo log can skipped in purge; NULL if none left */ -UNIV_INTERN -trx_undo_rec_t* -trx_purge_fetch_next_rec( -/*=====================*/ - roll_ptr_t* roll_ptr,/*!< out: roll pointer to undo record */ - trx_undo_inf_t** cell, /*!< out: storage cell for the record in the - purge array */ - mem_heap_t* heap); /*!< in: memory heap where copied */ -/*******************************************************************//** -Releases a reserved purge undo record. */ -UNIV_INTERN -void -trx_purge_rec_release( -/*==================*/ - trx_undo_inf_t* cell); /*!< in: storage cell */ /*******************************************************************//** This function runs a purge batch. @return number of undo log pages handled in the batch */ @@ -115,47 +85,92 @@ UNIV_INTERN ulint trx_purge( /*======*/ - ulint limit); /*!< in: the maximum number of records to - purge in one batch */ -/******************************************************************//** -Prints information of the purge system to stderr. */ + ulint n_purge_threads, /*!< in: number of purge tasks to + submit to task queue. */ + ulint limit, /*!< in: the maximum number of + records to purge in one batch */ + bool truncate); /*!< in: truncate history if true */ +/*******************************************************************//** +Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */ UNIV_INTERN void -trx_purge_sys_print(void); -/*======================*/ +trx_purge_stop(void); +/*================*/ +/*******************************************************************//** +Resume purge, move to PURGE_STATE_RUN. */ +UNIV_INTERN +void +trx_purge_run(void); +/*================*/ + +/** Purge states */ +enum purge_state_t { + PURGE_STATE_INIT, /*!< Purge instance created */ + PURGE_STATE_RUN, /*!< Purge should be running */ + PURGE_STATE_STOP, /*!< Purge should be stopped */ + PURGE_STATE_EXIT /*!< Purge has been shutdown */ +}; + +/*******************************************************************//** +Get the purge state. +@return purge state. */ +UNIV_INTERN +purge_state_t +trx_purge_state(void); +/*=================*/ + +/** This is the purge pointer/iterator. We need both the undo no and the +transaction no up to which purge has parsed and applied the records. */ +typedef struct purge_iter_struct { + trx_id_t trx_no; /*!< Purge has advanced past all + transactions whose number is less + than this */ + undo_no_t undo_no; /*!< Purge has advanced past all records + whose undo number is less than this */ +} purge_iter_t; /** The control structure used in the purge operation */ struct trx_purge_struct{ - ulint state; /*!< Purge system state */ sess_t* sess; /*!< System session running the purge query */ trx_t* trx; /*!< System transaction running the - purge - query: this trx is not in the trx list - of the trx system and it never ends */ - que_t* query; /*!< The query graph which will do the - parallelized purge operation */ + purge query: this trx is not in the + trx list of the trx system and it + never ends */ rw_lock_t latch; /*!< The latch protecting the purge - view. A purge operation must acquire - an x-latch here for the instant at which + view. A purge operation must acquire an + x-latch here for the instant at which it changes the purge view: an undo log operation can prevent this by - obtaining an s-latch here. */ + obtaining an s-latch here. It also + protects state and running */ + os_event_t event; /*!< State signal event */ + ulint n_stop; /*!< Counter to track number stops */ + bool running; /*!< true, if purge is active */ + volatile purge_state_t state; /*!< Purge coordinator thread states, + we check this in several places + without holding the latch. */ + que_t* query; /*!< The query graph which will do the + parallelized purge operation */ read_view_t* view; /*!< The purge will not remove undo logs which are >= this view (purge view) */ - ulonglong n_pages_handled;/*!< Approximate number of undo log - pages processed in purge */ - ulonglong handle_limit; /*!< Target of how many pages to get - processed in the current purge */ + volatile ulint n_submitted; /*!< Count of total tasks submitted + to the task queue */ + volatile ulint n_completed; /*!< Count of total tasks completed */ + /*------------------------------*/ /* The following two fields form the 'purge pointer' which advances during a purge, and which is used in history list truncation */ - trx_id_t purge_trx_no; /*!< Purge has advanced past all - transactions whose number is less - than this */ - undo_no_t purge_undo_no; /*!< Purge has advanced past all records - whose undo number is less than this */ + purge_iter_t iter; /* Limit up to which we have read and + parsed the UNDO log records. Not + necessarily purged from the indexes. + Note that this can never be less than + the limit below, we check for this + invariant in trx0purge.cc */ + purge_iter_t limit; /* The 'purge pointer' which advances + during a purge, and which is used in + history list truncation */ /*-----------------------------*/ ibool next_stored; /*!< TRUE if the info of the next record to purge is stored below: if yes, then @@ -174,9 +189,6 @@ struct trx_purge_struct{ the next record to purge belongs */ ulint hdr_offset; /*!< Header byte offset on the page */ /*-----------------------------*/ - trx_undo_arr_t* arr; /*!< Array of transaction numbers and - undo numbers of the undo records - currently under processing in purge */ mem_heap_t* heap; /*!< Temporary storage used during a purge: can be emptied after purge completes */ @@ -187,9 +199,14 @@ struct trx_purge_struct{ mutex_t bh_mutex; /*!< Mutex protecting ib_bh */ }; -#define TRX_PURGE_ON 1 /* purge operation is running */ -#define TRX_STOP_PURGE 2 /* purge operation is stopped, or - it should be stopped */ +/** Info required to purge a record */ +struct trx_purge_rec_struct { + trx_undo_rec_t* undo_rec; /*!< Record to purge */ + roll_ptr_t roll_ptr; /*!< File pointr to UNDO record */ +}; + +typedef struct trx_purge_rec_struct trx_purge_rec_t; + #ifndef UNIV_NONINL #include "trx0purge.ic" #endif diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic index de09e393654..ca9cc1fb894 100644 --- a/storage/innobase/include/trx0purge.ic +++ b/storage/innobase/include/trx0purge.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -41,3 +41,22 @@ trx_purge_get_log_from_hist( return(node_addr); } +#ifdef UNIV_DEBUG +/********************************************************************//** +address of its history list node. +@return TRUE if purge_sys_t::limit <= purge_sys_t::iter*/ +UNIV_INLINE +ibool +trx_purge_check_limit(void) +/*=======================*/ +{ + ut_ad(purge_sys->limit.trx_no <= purge_sys->iter.trx_no); + + if (purge_sys->limit.trx_no == purge_sys->iter.trx_no) { + ut_ad(purge_sys->limit.undo_no <= purge_sys->iter.undo_no); + } + + return(TRUE); +} +#endif /* UNIV_DEBUG */ + diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h index 477748f6f89..c9fae45dad4 100644 --- a/storage/innobase/include/trx0rec.h +++ b/storage/innobase/include/trx0rec.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -239,32 +239,13 @@ trx_undo_get_undo_rec_low( /*======================*/ roll_ptr_t roll_ptr, /*!< in: roll pointer to record */ mem_heap_t* heap); /*!< in: memory heap where copied */ -/******************************************************************//** -Copies an undo record to heap. - -NOTE: the caller must have latches on the clustered index page and -purge_view. - -@return DB_SUCCESS, or DB_MISSING_HISTORY if the undo log has been -truncated and we cannot fetch the old version */ -UNIV_INTERN -ulint -trx_undo_get_undo_rec( -/*==================*/ - roll_ptr_t roll_ptr, /*!< in: roll pointer to record */ - trx_id_t trx_id, /*!< in: id of the trx that generated - the roll pointer: it points to an - undo log of this transaction */ - trx_undo_rec_t** undo_rec, /*!< out, own: copy of the record */ - mem_heap_t* heap); /*!< in: memory heap where copied */ /*******************************************************************//** -Build a previous version of a clustered index record. This function checks -that the caller has a latch on the index page of the clustered index record -and an s-latch on the purge_view. This guarantees that the stack of versions -is locked. +Build a previous version of a clustered index record. The caller must +hold a latch on the index page of the clustered index record, to +guarantee that the stack of versions is locked all the way down to the +purge_sys->view. @return DB_SUCCESS, or DB_MISSING_HISTORY if the previous version is -earlier than purge_view, which means that it may have been removed, -DB_ERROR if corrupted record */ +earlier than purge_view, which means that it may have been removed */ UNIV_INTERN ulint trx_undo_prev_version_build( diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic index 4fc5a7147f9..847c26f03a8 100644 --- a/storage/innobase/include/trx0rec.ic +++ b/storage/innobase/include/trx0rec.ic @@ -108,6 +108,6 @@ trx_undo_rec_copy( len = mach_read_from_2(undo_rec) - ut_align_offset(undo_rec, UNIV_PAGE_SIZE); ut_ad(len < UNIV_PAGE_SIZE); - return(mem_heap_dup(heap, undo_rec, len)); + return((trx_undo_rec_t*) mem_heap_dup(heap, undo_rec, len)); } #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h index 1dee5655c8c..3b724e03830 100644 --- a/storage/innobase/include/trx0roll.h +++ b/storage/innobase/include/trx0roll.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -32,8 +32,6 @@ Created 3/26/1996 Heikki Tuuri #include "mtr0mtr.h" #include "trx0sys.h" -#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL) - /*******************************************************************//** Determines if this transaction is rolling back an incomplete transaction in crash recovery. @@ -53,12 +51,6 @@ trx_savept_take( /*============*/ trx_t* trx); /*!< in: transaction */ /*******************************************************************//** -Creates an undo number array. */ -UNIV_INTERN -trx_undo_arr_t* -trx_undo_arr_create(void); -/*=====================*/ -/*******************************************************************//** Frees an undo number array. */ UNIV_INTERN void @@ -74,13 +66,6 @@ trx_undo_arr_get_nth_info( /*======================*/ trx_undo_arr_t* arr, /*!< in: undo number array */ ulint n); /*!< in: position */ -/***********************************************************************//** -Tries truncate the undo logs. */ -UNIV_INTERN -void -trx_roll_try_truncate( -/*==================*/ - trx_t* trx); /*!< in/out: transaction */ /********************************************************************//** Pops the topmost record when the two undo logs of a transaction are seen as a single stack of records ordered by their undo numbers. Inserts the @@ -116,19 +101,6 @@ trx_undo_rec_release( /*=================*/ trx_t* trx, /*!< in/out: transaction */ undo_no_t undo_no);/*!< in: undo number */ -/*********************************************************************//** -Starts a rollback operation. */ -UNIV_INTERN -void -trx_rollback( -/*=========*/ - trx_t* trx, /*!< in: transaction */ - trx_sig_t* sig, /*!< in: signal starting the rollback */ - que_thr_t** next_thr);/*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ /*******************************************************************//** Rollback or clean up any incomplete transactions which were encountered in crash recovery. If the transaction already was @@ -147,38 +119,13 @@ committed, then we clean up a possible insert undo log. If the transaction was not yet committed, then we roll it back. Note: this is done in a background thread. @return a dummy parameter */ -UNIV_INTERN +extern "C" UNIV_INTERN os_thread_ret_t -trx_rollback_or_clean_all_recovered( -/*================================*/ +DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( +/*================================================*/ void* arg __attribute__((unused))); /*!< in: a dummy parameter required by os_thread_create */ -/****************************************************************//** -Finishes a transaction rollback. */ -UNIV_INTERN -void -trx_finish_rollback_off_kernel( -/*===========================*/ - que_t* graph, /*!< in: undo graph which can now be freed */ - trx_t* trx, /*!< in: transaction */ - que_thr_t** next_thr);/*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if this parameter is - NULL, it is ignored */ -/****************************************************************//** -Builds an undo 'query' graph for a transaction. The actual rollback is -performed by executing this query graph like a query subprocedure call. -The reply about the completion of the rollback will be sent by this -graph. -@return own: the query graph */ -UNIV_INTERN -que_t* -trx_roll_graph_build( -/*=================*/ - trx_t* trx); /*!< in: trx handle */ /*********************************************************************//** Creates a rollback command node struct. @return own: rollback node struct */ @@ -202,7 +149,7 @@ UNIV_INTERN int trx_rollback_for_mysql( /*===================*/ - trx_t* trx); /*!< in: transaction handle */ + trx_t* trx); /*!< in/out: transaction */ /*******************************************************************//** Rollback the latest SQL statement for MySQL. @return error code or DB_SUCCESS */ @@ -210,14 +157,14 @@ UNIV_INTERN int trx_rollback_last_sql_stat_for_mysql( /*=================================*/ - trx_t* trx); /*!< in: transaction handle */ + trx_t* trx); /*!< in/out: transaction */ /*******************************************************************//** -Rollback a transaction used in MySQL. +Rollback a transaction to a given savepoint or do a complete rollback. @return error code or DB_SUCCESS */ UNIV_INTERN int -trx_general_rollback_for_mysql( -/*===========================*/ +trx_rollback_to_savepoint( +/*======================*/ trx_t* trx, /*!< in: transaction handle */ trx_savept_t* savept);/*!< in: pointer to savepoint undo number, if partial rollback requested, or NULL for @@ -273,17 +220,7 @@ trx_release_savepoint_for_mysql( const char* savepoint_name); /*!< in: savepoint name */ /*******************************************************************//** -Frees a single savepoint struct. */ -UNIV_INTERN -void -trx_roll_savepoint_free( -/*=====================*/ - trx_t* trx, /*!< in: transaction handle */ - trx_named_savept_t* savep); /*!< in: savepoint to free */ - -/*******************************************************************//** -Frees savepoint structs starting from savep, if savep == NULL then -free all savepoints. */ +Frees savepoint structs starting from savep. */ UNIV_INTERN void trx_roll_savepoints_free( @@ -295,10 +232,10 @@ trx_roll_savepoints_free( /** A cell of trx_undo_arr_struct; used during a rollback and a purge */ struct trx_undo_inf_struct{ + ibool in_use; /*!< true if cell is being used */ trx_id_t trx_no; /*!< transaction number: not defined during a rollback */ undo_no_t undo_no;/*!< undo number of an undo record */ - ibool in_use; /*!< TRUE if the cell is in use */ }; /** During a rollback and a purge, undo numbers of undo records currently being @@ -306,17 +243,18 @@ processed are stored in this array */ struct trx_undo_arr_struct{ ulint n_cells; /*!< number of cells in the array */ - ulint n_used; /*!< number of cells currently in use */ + ulint n_used; /*!< number of cells in use */ trx_undo_inf_t* infos; /*!< the array of undo infos */ mem_heap_t* heap; /*!< memory heap from which allocated */ }; /** Rollback node states */ enum roll_node_state { - ROLL_NODE_SEND = 1, /*!< about to send a rollback signal to - the transaction */ - ROLL_NODE_WAIT /*!< rollback signal sent to the transaction, - waiting for completion */ + ROLL_NODE_NONE = 0, /*!< Unknown state */ + ROLL_NODE_SEND, /*!< about to send a rollback signal to + the transaction */ + ROLL_NODE_WAIT /*!< rollback signal sent to the + transaction, waiting for completion */ }; /** Rollback command node in a query graph */ @@ -328,6 +266,7 @@ struct roll_node_struct{ trx_savept_t savept; /*!< savepoint to which to roll back, in the case of a partial rollback */ + que_thr_t* undo_thr;/*!< undo query graph */ }; /** A savepoint set with SQL's "SAVEPOINT savepoint_id" command */ diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic index 3460832b18c..178e9bb730a 100644 --- a/storage/innobase/include/trx0roll.ic +++ b/storage/innobase/include/trx0roll.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 5acde05de3d..66e5449cf57 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -29,6 +29,7 @@ Created 3/26/1996 Heikki Tuuri #include "univ.i" #include "trx0types.h" #include "trx0sys.h" +#include "ut0bh.h" /******************************************************************//** Gets a rollback segment header. @@ -86,11 +87,11 @@ trx_rsegf_undo_find_free( /******************************************************************//** Looks for a rollback segment, based on the rollback segment id. @return rollback segment */ -UNIV_INTERN +UNIV_INLINE trx_rseg_t* trx_rseg_get_on_id( /*===============*/ - ulint id); /*!< in: rollback segment id */ + ulint id); /*!< in: rollback segment id */ /****************************************************************//** Creates a rollback segment header. This function is called only when a new rollback segment is created in the database. @@ -107,30 +108,42 @@ trx_rseg_header_create( mtr_t* mtr); /*!< in: mtr */ /*********************************************************************//** Creates the memory copies for rollback segments and initializes the -rseg list and array in trx_sys at a database startup. */ +rseg array in trx_sys at a database startup. */ UNIV_INTERN void -trx_rseg_list_and_array_init( -/*=========================*/ - trx_sysf_t* sys_header, /*!< in: trx system header */ +trx_rseg_array_init( +/*================*/ + trx_sysf_t* sys_header, /*!< in/out: trx system header */ ib_bh_t* ib_bh, /*!< in: rseg queue */ - mtr_t* mtr); /*!< in: mtr */ - + mtr_t* mtr); /*!< in/out: mtr */ /*************************************************************************** Free's an instance of the rollback segment in memory. */ UNIV_INTERN void trx_rseg_mem_free( /*==============*/ - trx_rseg_t* rseg); /* in, own: instance to free */ + trx_rseg_t* rseg); /*!< in, own: instance to free */ /********************************************************************* Creates a rollback segment. */ UNIV_INTERN trx_rseg_t* -trx_rseg_create(void); -/*==================*/ - +trx_rseg_create( +/*============*/ + ulint space); /*!< in: id of UNDO tablespace */ + +/******************************************************************** +Get the number of unique rollback tablespaces in use except space id 0. +The last space id will be the sentinel value ULINT_UNDEFINED. The array +will be sorted on space id. Note: space_ids should have have space for +TRX_SYS_N_RSEGS + 1 elements. +@return number of unique rollback tablespaces in use. */ +UNIV_INTERN +ulint +trx_rseg_get_n_undo_tablespaces( +/*============================*/ + ulint* space_ids); /*!< out: array of space ids of + UNDO tablespaces */ /* Number of undo log slots in a rollback segment file copy */ #define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16) @@ -176,16 +189,12 @@ struct trx_rseg_struct{ yet purged log */ ibool last_del_marks; /*!< TRUE if the last not yet purged log needs purging */ - /*--------------------------------------------------------*/ - UT_LIST_NODE_T(trx_rseg_t) rseg_list; - /* the list of the rollback segment - memory objects */ }; /** For prioritising the rollback segments for purge. */ struct rseg_queue_struct { - trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */ - trx_rseg_t* rseg; /*!< Rollback segment */ + trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */ + trx_rseg_t* rseg; /*!< Rollback segment */ }; typedef struct rseg_queue_struct rseg_queue_t; diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic index 5e8d2b41120..30743da9b8c 100644 --- a/storage/innobase/include/trx0rseg.ic +++ b/storage/innobase/include/trx0rseg.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -86,7 +86,7 @@ trx_rsegf_get_nth_undo( ulint n, /*!< in: index of slot */ mtr_t* mtr) /*!< in: mtr */ { - if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) { + if (n >= TRX_RSEG_N_SLOTS) { fprintf(stderr, "InnoDB: Error: trying to get slot %lu of rseg\n", (ulong) n); @@ -108,7 +108,7 @@ trx_rsegf_set_nth_undo( ulint page_no,/*!< in: page number of the undo log segment */ mtr_t* mtr) /*!< in: mtr */ { - if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) { + if (n >= TRX_RSEG_N_SLOTS) { fprintf(stderr, "InnoDB: Error: trying to set slot %lu of rseg\n", (ulong) n); @@ -150,3 +150,18 @@ trx_rsegf_undo_find_free( return(ULINT_UNDEFINED); } + +/******************************************************************//** +Looks for a rollback segment, based on the rollback segment id. +@return rollback segment */ +UNIV_INLINE +trx_rseg_t* +trx_rseg_get_on_id( +/*===============*/ + ulint id) /*!< in: rollback segment id */ +{ + ut_a(id < TRX_SYS_N_RSEGS); + + return(trx_sys->rseg_array[id]); +} + diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 3913792d594..a454c682f89 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -41,6 +41,9 @@ Created 3/26/1996 Heikki Tuuri #include "ut0bh.h" #include "read0types.h" #include "page0types.h" +#include "ut0bh.h" + +typedef UT_LIST_BASE_NODE_T(trx_t) trx_list_t; /** In a MySQL replication slave, in crash recovery we store the master log file name and position here. */ @@ -66,53 +69,6 @@ extern ib_int64_t trx_sys_mysql_bin_log_pos; /** The transaction system */ extern trx_sys_t* trx_sys; -/** Doublewrite system */ -extern trx_doublewrite_t* trx_doublewrite; -/** The following is set to TRUE when we are upgrading from pre-4.1 -format data files to the multiple tablespaces format data files */ -extern ibool trx_doublewrite_must_reset_space_ids; -/** Set to TRUE when the doublewrite buffer is being created */ -extern ibool trx_doublewrite_buf_is_being_created; -/** The following is TRUE when we are using the database in the -post-4.1 format, i.e., we have successfully upgraded, or have created -a new database installation */ -extern ibool trx_sys_multiple_tablespace_format; - -/****************************************************************//** -Creates the doublewrite buffer to a new InnoDB installation. The header of the -doublewrite buffer is placed on the trx system header page. */ -UNIV_INTERN -void -trx_sys_create_doublewrite_buf(void); -/*================================*/ -/****************************************************************//** -At a database startup initializes the doublewrite buffer memory structure if -we already have a doublewrite buffer created in the data files. If we are -upgrading to an InnoDB version which supports multiple tablespaces, then this -function performs the necessary update operations. If we are in a crash -recovery, this function uses a possible doublewrite buffer to restore -half-written pages in the data files. */ -UNIV_INTERN -void -trx_sys_doublewrite_init_or_restore_pages( -/*======================================*/ - ibool restore_corrupt_pages); /*!< in: TRUE=restore pages */ -/****************************************************************//** -Marks the trx sys header when we have successfully upgraded to the >= 4.1.x -multiple tablespace format. */ -UNIV_INTERN -void -trx_sys_mark_upgraded_to_multiple_tablespaces(void); -/*===============================================*/ -/****************************************************************//** -Determines if a page number is located inside the doublewrite buffer. -@return TRUE if the location is inside the two blocks of the -doublewrite buffer */ -UNIV_INTERN -ibool -trx_doublewrite_page_inside( -/*========================*/ - ulint page_no); /*!< in: page number */ /***************************************************************//** Checks if a page address is the trx sys header page. @return TRUE if trx sys header page */ @@ -124,17 +80,24 @@ trx_sys_hdr_page( ulint page_no);/*!< in: page number */ /*****************************************************************//** Creates and initializes the central memory structures for the transaction -system. This is called when the database is started. */ +system. This is called when the database is started. +@return min binary heap of rsegs to purge */ UNIV_INTERN -void +ib_bh_t* trx_sys_init_at_db_start(void); /*==========================*/ /*****************************************************************//** -Creates and initializes the transaction system at the database creation. */ +Creates the trx_sys instance and initializes ib_bh and mutex. */ UNIV_INTERN void trx_sys_create(void); /*================*/ +/*****************************************************************//** +Creates and initializes the transaction system at the database creation. */ +UNIV_INTERN +void +trx_sys_create_sys_pages(void); +/*==========================*/ /****************************************************************//** Looks for a free slot for a rollback segment in the trx system file copy. @return slot index or ULINT_UNDEFINED if not found */ @@ -152,16 +115,6 @@ trx_sys_get_nth_rseg( /*=================*/ trx_sys_t* sys, /*!< in: trx system */ ulint n); /*!< in: index of slot */ -/***************************************************************//** -Sets the pointer in the nth slot of the rseg array. */ -UNIV_INLINE -void -trx_sys_set_nth_rseg( -/*=================*/ - trx_sys_t* sys, /*!< in: trx system */ - ulint n, /*!< in: index of slot */ - trx_rseg_t* rseg); /*!< in: pointer to rseg object, NULL if slot - not in use */ /**********************************************************************//** Gets a pointer to the transaction system file copy and x-locks its page. @return pointer to system file copy, page x-locked */ @@ -222,6 +175,14 @@ UNIV_INLINE trx_id_t trx_sys_get_new_trx_id(void); /*========================*/ +/*****************************************************************//** +Determines the maximum transaction id. +@return maximum currently allocated trx id; will be stale after the +next call to trx_sys_get_new_trx_id() */ +UNIV_INLINE +trx_id_t +trx_sys_get_max_trx_id(void); +/*========================*/ #endif /* !UNIV_HOTBACKUP */ #ifdef UNIV_DEBUG @@ -251,39 +212,65 @@ trx_read_trx_id( /*============*/ const byte* ptr); /*!< in: pointer to memory from where to read */ /****************************************************************//** -Looks for the trx handle with the given id in trx_list. -@return the trx handle or NULL if not found */ +Looks for the trx instance with the given id in the rw trx_list. +The caller must be holding trx_sys->mutex. +@return the trx handle or NULL if not found; +the pointer must not be dereferenced unless lock_sys->mutex was +acquired before calling this function and is still being held */ UNIV_INLINE trx_t* -trx_get_on_id( -/*==========*/ +trx_get_rw_trx_by_id( +/*=================*/ trx_id_t trx_id);/*!< in: trx id to search for */ /****************************************************************//** -Returns the minumum trx id in trx list. This is the smallest id for which -the trx can possibly be active. (But, you must look at the trx->conc_state to +Returns the minimum trx id in rw trx list. This is the smallest id for which +the trx can possibly be active. (But, you must look at the trx->state to find out if the minimum trx id transaction itself is active, or already committed.) @return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */ UNIV_INLINE trx_id_t -trx_list_get_min_trx_id(void); -/*=========================*/ +trx_rw_min_trx_id(void); +/*===================*/ /****************************************************************//** -Checks if a transaction with the given id is active. -@return TRUE if active */ +Checks if a rw transaction with the given id is active. Caller must hold +trx_sys->mutex in shared mode. If the caller is not holding +lock_sys->mutex, the transaction may already have been committed. +@return transaction instance if active, or NULL; +the pointer must not be dereferenced unless lock_sys->mutex was +acquired before calling this function and is still being held */ UNIV_INLINE -ibool -trx_is_active( -/*==========*/ - trx_id_t trx_id);/*!< in: trx id of the transaction */ +trx_t* +trx_rw_is_active_low( +/*=================*/ + trx_id_t trx_id, /*!< in: trx id of the transaction */ + ibool* corrupt); /*!< in: NULL or pointer to a flag + that will be set if corrupt */ /****************************************************************//** -Checks that trx is in the trx list. +Checks if a rw transaction with the given id is active. If the caller is +not holding lock_sys->mutex, the transaction may already have been +committed. +@return transaction instance if active, or NULL; +the pointer must not be dereferenced unless lock_sys->mutex was +acquired before calling this function and is still being held */ +UNIV_INLINE +trx_t* +trx_rw_is_active( +/*=============*/ + trx_id_t trx_id, /*!< in: trx id of the transaction */ + ibool* corrupt); /*!< in: NULL or pointer to a flag + that will be set if corrupt */ +#ifdef UNIV_DEBUG +/****************************************************************//** +Checks whether a trx is in one of rw_trx_list or ro_trx_list. @return TRUE if is in */ UNIV_INTERN ibool trx_in_trx_list( /*============*/ - trx_t* in_trx);/*!< in: trx */ + const trx_t* in_trx) /*!< in: transaction */ + __attribute__((nonnull, warn_unused_result)); +#endif /* UNIV_DEBUG */ #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG /***********************************************************//** Assert that a transaction has been recovered. @@ -344,14 +331,12 @@ UNIV_INTERN void trx_sys_file_format_tag_init(void); /*==============================*/ -#ifndef UNIV_HOTBACKUP /*****************************************************************//** Shutdown/Close the transaction system. */ UNIV_INTERN void trx_sys_close(void); /*===============*/ -#endif /* !UNIV_HOTBACKUP */ /*****************************************************************//** Get the name representation of the file format from its id. @return pointer to the name */ @@ -371,31 +356,30 @@ trx_sys_file_format_max_set( ulint format_id, /*!< in: file format id */ const char** name); /*!< out: max file format name or NULL if not needed. */ -/*****************************************************************//** -Get the name representation of the file format from its id. -@return pointer to the max format name */ +/********************************************************************* +Creates the rollback segments +@return number of rollback segments that are active. */ UNIV_INTERN -const char* -trx_sys_file_format_max_get(void); -/*=============================*/ +ulint +trx_sys_create_rsegs( +/*=================*/ + ulint n_spaces, /*!< number of tablespaces for UNDO logs */ + ulint n_rsegs); /*!< number of rollback segments to create */ /*****************************************************************//** -Check for the max file format tag stored on disk. -@return DB_SUCCESS or error code */ -UNIV_INTERN +Get the number of transaction in the system, independent of their state. +@return count of transactions in trx_sys_t::trx_list */ +UNIV_INLINE ulint -trx_sys_file_format_max_check( -/*==========================*/ - ulint max_format_id); /*!< in: the max format id to check */ -/********************************************************************//** -Update the file format tag in the system tablespace only if the given -format id is greater than the known max id. -@return TRUE if format_id was bigger than the known max id */ +trx_sys_get_n_rw_trx(void); +/*======================*/ + +/********************************************************************* +Check if there are any active (non-prepared) transactions. +@return total number of active transactions or 0 if none */ UNIV_INTERN -ibool -trx_sys_file_format_max_upgrade( -/*============================*/ - const char** name, /*!< out: max file format name */ - ulint format_id); /*!< in: file format identifier */ +ulint +trx_sys_any_active_transactions(void); +/*=================================*/ #else /* !UNIV_HOTBACKUP */ /*****************************************************************//** Prints to stderr the MySQL binlog info in the system header if the @@ -432,6 +416,32 @@ trx_sys_read_pertable_file_format_id( datafile */ ulint *format_id); /*!< out: file format of the per-table data file */ +#endif /* !UNIV_HOTBACKUP */ +/*****************************************************************//** +Get the name representation of the file format from its id. +@return pointer to the max format name */ +UNIV_INTERN +const char* +trx_sys_file_format_max_get(void); +/*=============================*/ +/*****************************************************************//** +Check for the max file format tag stored on disk. +@return DB_SUCCESS or error code */ +UNIV_INTERN +ulint +trx_sys_file_format_max_check( +/*==========================*/ + ulint max_format_id); /*!< in: the max format id to check */ +/********************************************************************//** +Update the file format tag in the system tablespace only if the given +format id is greater than the known max id. +@return TRUE if format_id was bigger than the known max id */ +UNIV_INTERN +ibool +trx_sys_file_format_max_upgrade( +/*============================*/ + const char** name, /*!< out: max file format name */ + ulint format_id); /*!< in: file format identifier */ /*****************************************************************//** Get the name representation of the file format from its id. @return pointer to the name */ @@ -441,14 +451,14 @@ trx_sys_file_format_id_to_name( /*===========================*/ const ulint id); /*!< in: id of the file format */ -#endif /* !UNIV_HOTBACKUP */ -/********************************************************************* -Creates the rollback segments */ +#ifdef UNIV_DEBUG +/*************************************************************//** +Validate the trx_sys_t::trx_list. */ UNIV_INTERN -void -trx_sys_create_rsegs( -/*=================*/ - ulint n_rsegs); /*!< number of rollback segments to create */ +ibool +trx_sys_validate_trx_list(void); +/*===========================*/ +#endif /* UNIV_DEBUG */ /* The automatically created system rollback segment has this id */ #define TRX_SYS_SYSTEM_RSEG_ID 0 @@ -502,8 +512,8 @@ We must remember this limit in order to keep file compatibility. */ /** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */ #define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344 -#if UNIV_PAGE_SIZE < 4096 -# error "UNIV_PAGE_SIZE < 4096" +#if UNIV_PAGE_SIZE_MIN < 4096 +# error "UNIV_PAGE_SIZE_MIN < 4096" #endif /** The offset of the MySQL replication info in the trx system header; this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ @@ -559,7 +569,7 @@ this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ /** If this is not yet set to TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N, we must reset the doublewrite buffer, because starting from 4.1.x the space id of a data page is stored into -FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */ +FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */ #define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE) /*-------------------------------------------------------------*/ @@ -572,7 +582,6 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */ #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE /* @} */ -#ifndef UNIV_HOTBACKUP /** File format tag */ /* @{ */ /** The offset of the file format tag on the trx system header page @@ -591,48 +600,50 @@ identifier is added to this 64-bit constant. */ | TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW) /* @} */ -/** Doublewrite control struct */ -struct trx_doublewrite_struct{ - mutex_t mutex; /*!< mutex protecting the first_free field and - write_buf */ - ulint block1; /*!< the page number of the first - doublewrite block (64 pages) */ - ulint block2; /*!< page number of the second block */ - ulint first_free; /*!< first free position in write_buf measured - in units of UNIV_PAGE_SIZE */ - byte* write_buf; /*!< write buffer used in writing to the - doublewrite buffer, aligned to an - address divisible by UNIV_PAGE_SIZE - (which is required by Windows aio) */ - byte* write_buf_unaligned; - /*!< pointer to write_buf, but unaligned */ - buf_page_t** - buf_block_arr; /*!< array to store pointers to the buffer - blocks which have been cached to write_buf */ -}; - -/** The transaction system central memory data structure; protected by the -kernel mutex */ +#ifndef UNIV_HOTBACKUP +/** The transaction system central memory data structure. */ struct trx_sys_struct{ + + mutex_t mutex; /*!< mutex protecting most fields in + this structure except when noted + otherwise */ + ulint n_mysql_trx; /*!< Number of transactions currently + allocated for MySQL */ + ulint n_prepared_trx; /*!< Number of transactions currently + in the XA PREPARED state */ trx_id_t max_trx_id; /*!< The smallest number not yet assigned as a transaction id or transaction number */ - UT_LIST_BASE_NODE_T(trx_t) trx_list; - /*!< List of active and committed in - memory transactions, sorted on trx id, - biggest first */ - UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list; - /*!< List of transactions created - for MySQL */ - UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list; - /*!< List of rollback segment - objects */ - trx_rseg_t* latest_rseg; /*!< Latest rollback segment in the - round-robin assignment of rollback - segments to transactions */ - trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS]; + trx_list_t rw_trx_list; /*!< List of active and committed in + memory read-write transactions, sorted + on trx id, biggest first. Recovered + transactions are always on this list. */ + trx_list_t ro_trx_list; /*!< List of active and committed in + memory read-only transactions, sorted + on trx id, biggest first. NOTE: + The order for read-only transactions + is not necessary. We should exploit + this and increase concurrency during + add/remove. */ + trx_list_t mysql_trx_list; /*!< List of transactions created + for MySQL. All transactions on + ro_trx_list are on mysql_trx_list. The + rw_trx_list can contain system + transactions and recovered transactions + that will not be in the mysql_trx_list. + There can be active non-locking + auto-commit read only transactions that + are on this list but not on ro_trx_list. + mysql_trx_list may additionally contain + transactions that have not yet been + started in InnoDB. */ + trx_rseg_t* const rseg_array[TRX_SYS_N_RSEGS]; /*!< Pointer array to rollback - segments; NULL if slot not in use */ + segments; NULL if slot not in use; + created and destroyed in + single-threaded mode; not protected + by any mutex, because it is read-only + during multi-threaded operation */ ulint rseg_history_len;/*!< Length of the TRX_RSEG_HISTORY list (update undo logs for committed transactions), protected by diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic index 5e702b25325..e097e29b551 100644 --- a/storage/innobase/include/trx0sys.ic +++ b/storage/innobase/include/trx0sys.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -80,28 +80,11 @@ trx_sys_get_nth_rseg( trx_sys_t* sys, /*!< in: trx system */ ulint n) /*!< in: index of slot */ { - ut_ad(mutex_own(&(kernel_mutex))); ut_ad(n < TRX_SYS_N_RSEGS); return(sys->rseg_array[n]); } -/***************************************************************//** -Sets the pointer in the nth slot of the rseg array. */ -UNIV_INLINE -void -trx_sys_set_nth_rseg( -/*=================*/ - trx_sys_t* sys, /*!< in: trx system */ - ulint n, /*!< in: index of slot */ - trx_rseg_t* rseg) /*!< in: pointer to rseg object, NULL if slot - not in use */ -{ - ut_ad(n < TRX_SYS_N_RSEGS); - - sys->rseg_array[n] = rseg; -} - /**********************************************************************//** Gets a pointer to the transaction system header and x-latches its page. @return pointer to system header, page x-latched. */ @@ -137,7 +120,6 @@ trx_sysf_rseg_get_space( ulint i, /*!< in: slot index == rseg id */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(mutex_own(&(kernel_mutex))); ut_ad(sys_header); ut_ad(i < TRX_SYS_N_RSEGS); @@ -159,7 +141,6 @@ trx_sysf_rseg_get_page_no( mtr_t* mtr) /*!< in: mtr */ { ut_ad(sys_header); - ut_ad(mutex_own(&(kernel_mutex))); ut_ad(i < TRX_SYS_N_RSEGS); return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS @@ -179,7 +160,6 @@ trx_sysf_rseg_set_space( ulint space, /*!< in: space id */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(mutex_own(&(kernel_mutex))); ut_ad(sys_header); ut_ad(i < TRX_SYS_N_RSEGS); @@ -203,7 +183,6 @@ trx_sysf_rseg_set_page_no( slot is reset to unused */ mtr_t* mtr) /*!< in: mtr */ { - ut_ad(mutex_own(&(kernel_mutex))); ut_ad(sys_header); ut_ad(i < TRX_SYS_N_RSEGS); @@ -251,30 +230,96 @@ trx_read_trx_id( } /****************************************************************//** -Looks for the trx handle with the given id in trx_list. -@return the trx handle or NULL if not found */ +Looks for the trx handle with the given id in rw_trx_list. +The caller must be holding trx_sys->mutex. +@return the trx handle or NULL if not found; +the pointer must not be dereferenced unless lock_sys->mutex was +acquired before calling this function and is still being held */ UNIV_INLINE trx_t* -trx_get_on_id( -/*==========*/ +trx_get_rw_trx_by_id( +/*=================*/ trx_id_t trx_id) /*!< in: trx id to search for */ { - trx_t* trx; + trx_t* trx; + ulint len; + trx_t* first; - ut_ad(mutex_own(&(kernel_mutex))); + ut_ad(mutex_own(&trx_sys->mutex)); - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); + len = UT_LIST_GET_LEN(trx_sys->rw_trx_list); - while (trx != NULL) { - if (trx_id == trx->id) { + if (len == 0) { + return(NULL); + } + + /* Because the list is ordered on trx id in descending order, + we try to speed things up a bit. */ + + trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); + assert_trx_in_rw_list(trx); + + if (trx_id == trx->id) { + return(trx); + } else if (len == 1 || trx_id > trx->id) { + return(NULL); + } + + first = trx; - return(trx); + trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list); + assert_trx_in_rw_list(trx); + + if (trx_id == trx->id) { + return(trx); + } else if (len == 2 || trx_id < trx->id) { + return(NULL); + } + + /* Search the list from the lower end (tail). */ + if (trx_id < (first->id + trx->id) >> 1) { + for (trx = UT_LIST_GET_PREV(trx_list, trx); + trx != NULL && trx_id > trx->id; + trx = UT_LIST_GET_PREV(trx_list, trx)) { + assert_trx_in_rw_list(trx); } + } else { + for (trx = UT_LIST_GET_NEXT(trx_list, first); + trx != NULL && trx_id < trx->id; + trx = UT_LIST_GET_NEXT(trx_list, trx)) { + assert_trx_in_rw_list(trx); + } + } - trx = UT_LIST_GET_NEXT(trx_list, trx); + return((trx != NULL && trx->id == trx_id) ? trx : NULL); +} + +/****************************************************************//** +Returns the minimum trx id in trx list. This is the smallest id for which +the trx can possibly be active. (But, you must look at the trx->state +to find out if the minimum trx id transaction itself is active, or already +committed.). The caller must be holding the trx_sys_t::mutex in shared mode. +@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */ +UNIV_INLINE +trx_id_t +trx_rw_min_trx_id_low(void) +/*=======================*/ +{ + trx_id_t id; + const trx_t* trx; + + ut_ad(mutex_own(&trx_sys->mutex)); + + trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list); + + if (trx == NULL) { + id = trx_sys->max_trx_id; + } else { + assert_trx_in_rw_list(trx); + id = trx->id; } - return(NULL); + return(id); } #if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG @@ -287,79 +332,109 @@ trx_assert_recovered( /*=================*/ trx_id_t trx_id) /*!< in: transaction identifier */ { - trx_t* trx; + const trx_t* trx; - mutex_enter(&kernel_mutex); - trx = trx_get_on_id(trx_id); - ut_a(trx); + mutex_enter(&trx_sys->mutex); + + trx = trx_get_rw_trx_by_id(trx_id); ut_a(trx->is_recovered); - mutex_exit(&kernel_mutex); + + mutex_exit(&trx_sys->mutex); return(TRUE); } #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ /****************************************************************//** -Returns the minumum trx id in trx list. This is the smallest id for which -the trx can possibly be active. (But, you must look at the trx->conc_state to -find out if the minimum trx id transaction itself is active, or already +Returns the minimum trx id in rw trx list. This is the smallest id for which +the rw trx can possibly be active. (But, you must look at the trx->state +to find out if the minimum trx id transaction itself is active, or already committed.) -@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */ +@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */ UNIV_INLINE trx_id_t -trx_list_get_min_trx_id(void) -/*=========================*/ +trx_rw_min_trx_id(void) +/*===================*/ { - trx_t* trx; + trx_id_t id; - ut_ad(mutex_own(&(kernel_mutex))); + mutex_enter(&trx_sys->mutex); - trx = UT_LIST_GET_LAST(trx_sys->trx_list); + id = trx_rw_min_trx_id_low(); - if (trx == NULL) { + mutex_exit(&trx_sys->mutex); - return(trx_sys->max_trx_id); - } - - return(trx->id); + return(id); } /****************************************************************//** -Checks if a transaction with the given id is active. -@return TRUE if active */ +Checks if a rw transaction with the given id is active. Caller must hold +trx_sys->mutex. If the caller is not holding lock_sys->mutex, the +transaction may already have been committed. +@return transaction instance if active, or NULL; +the pointer must not be dereferenced unless lock_sys->mutex was +acquired before calling this function and is still being held */ UNIV_INLINE -ibool -trx_is_active( -/*==========*/ - trx_id_t trx_id) /*!< in: trx id of the transaction */ +trx_t* +trx_rw_is_active_low( +/*=================*/ + trx_id_t trx_id, /*!< in: trx id of the transaction */ + ibool* corrupt) /*!< in: NULL or pointer to a flag + that will be set if corrupt */ { - trx_t* trx; + trx_t* trx; - ut_ad(mutex_own(&(kernel_mutex))); + ut_ad(mutex_own(&trx_sys->mutex)); - if (trx_id < trx_list_get_min_trx_id()) { + if (trx_id < trx_rw_min_trx_id_low()) { - return(FALSE); - } + trx = NULL; + } else if (trx_id >= trx_sys->max_trx_id) { - if (UNIV_UNLIKELY(trx_id >= trx_sys->max_trx_id)) { + /* There must be corruption: we let the caller handle the + diagnostic prints in this case. */ - /* There must be corruption: we return TRUE because this - function is only called by lock_clust_rec_some_has_impl() - and row_vers_impl_x_locked_off_kernel() and they have - diagnostic prints in this case */ + trx = NULL; + if (corrupt != NULL) { + *corrupt = TRUE; + } + } else { + trx = trx_get_rw_trx_by_id(trx_id); - return(TRUE); + if (trx != NULL + && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) { + + trx = NULL; + } } - trx = trx_get_on_id(trx_id); - if (trx && (trx->conc_state == TRX_ACTIVE - || trx->conc_state == TRX_PREPARED)) { + return(trx); +} - return(TRUE); - } +/****************************************************************//** +Checks if a rw transaction with the given id is active. If the caller is +not holding lock_sys->mutex, the transaction may already have been +committed. +@return transaction instance if active, or NULL; +the pointer must not be dereferenced unless lock_sys->mutex was +acquired before calling this function and is still being held */ +UNIV_INLINE +trx_t* +trx_rw_is_active( +/*=============*/ + trx_id_t trx_id, /*!< in: trx id of the transaction */ + ibool* corrupt) /*!< in: NULL or pointer to a flag + that will be set if corrupt */ +{ + trx_t* trx; - return(FALSE); + mutex_enter(&trx_sys->mutex); + + trx = trx_rw_is_active_low(trx_id, corrupt); + + mutex_exit(&trx_sys->mutex); + + return(trx); } /*****************************************************************//** @@ -370,9 +445,7 @@ trx_id_t trx_sys_get_new_trx_id(void) /*========================*/ { - trx_id_t id; - - ut_ad(mutex_own(&kernel_mutex)); + ut_ad(mutex_own(&trx_sys->mutex)); /* VERY important: after the database is started, max_trx_id value is divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if @@ -381,14 +454,59 @@ trx_sys_get_new_trx_id(void) Thus trx id values will not overlap when the database is repeatedly started! */ - if ((ulint) trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) { + if (!(trx_sys->max_trx_id % (trx_id_t) TRX_SYS_TRX_ID_WRITE_MARGIN)) { trx_sys_flush_max_trx_id(); } - id = trx_sys->max_trx_id++; + return(trx_sys->max_trx_id++); +} - return(id); +/*****************************************************************//** +Determines the maximum transaction id. +@return maximum currently allocated trx id; will be stale after the +next call to trx_sys_get_new_trx_id() */ +UNIV_INLINE +trx_id_t +trx_sys_get_max_trx_id(void) +/*========================*/ +{ +#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN + trx_id_t max_trx_id; +#endif + + ut_ad(!mutex_own(&trx_sys->mutex)); + +#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN + /* Avoid torn reads. */ + mutex_enter(&trx_sys->mutex); + max_trx_id = trx_sys->max_trx_id; + mutex_exit(&trx_sys->mutex); + return(max_trx_id); +#else + /* Perform a dirty read. Callers should be prepared for stale + values, and we know that the value fits in a machine word, so + that it will be read and written atomically. */ + return(trx_sys->max_trx_id); +#endif } +/*****************************************************************//** +Get the number of transaction in the system, independent of their state. +@return count of transactions in trx_sys_t::rw_trx_list */ +UNIV_INLINE +ulint +trx_sys_get_n_rw_trx(void) +/*======================*/ +{ + ulint n_trx; + + mutex_enter(&trx_sys->mutex); + + n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); + + mutex_exit(&trx_sys->mutex); + + return(n_trx); +} #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 7572c766301..3e6cfc7d0da 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -31,30 +31,25 @@ Created 3/26/1996 Heikki Tuuri #include "dict0types.h" #ifndef UNIV_HOTBACKUP #include "lock0types.h" +#include "log0log.h" #include "usr0types.h" #include "que0types.h" #include "mem0mem.h" #include "read0types.h" #include "trx0xa.h" #include "ut0vec.h" +#include "fts0fts.h" /** Dummy session used currently in MySQL interface */ extern sess_t* trx_dummy_sess; -/** Number of transactions currently allocated for MySQL: protected by -the kernel mutex */ -extern ulint trx_n_mysql_transactions; -/** Number of transactions currently in the XA PREPARED state: protected by -the kernel mutex */ -extern ulint trx_n_prepared; - /********************************************************************//** Releases the search latch if trx has reserved it. */ -UNIV_INTERN +UNIV_INLINE void trx_search_latch_release_if_reserved( /*=================================*/ - trx_t* trx); /*!< in: transaction */ + trx_t* trx); /*!< in: transaction */ /******************************************************************//** Set detailed error message for the transaction. */ UNIV_INTERN @@ -80,15 +75,6 @@ const dict_index_t* trx_get_error_info( /*===============*/ const trx_t* trx); /*!< in: trx object */ -/****************************************************************//** -Creates and initializes a transaction object. -@return own: the transaction */ -UNIV_INTERN -trx_t* -trx_create( -/*=======*/ - sess_t* sess) /*!< in: session */ - __attribute__((nonnull)); /********************************************************************//** Creates a transaction object for MySQL. @return own: transaction object */ @@ -104,11 +90,11 @@ trx_t* trx_allocate_for_background(void); /*=============================*/ /********************************************************************//** -Frees a transaction object. */ +Frees a transaction object of a background operation of the master thread. */ UNIV_INTERN void -trx_free( -/*=====*/ +trx_free_for_background( +/*====================*/ trx_t* trx); /*!< in, own: trx object */ /********************************************************************//** At shutdown, frees a transaction object that is in the PREPARED state. */ @@ -125,13 +111,6 @@ void trx_free_for_mysql( /*===============*/ trx_t* trx); /*!< in, own: trx object */ -/********************************************************************//** -Frees a transaction object of a background operation of the master thread. */ -UNIV_INTERN -void -trx_free_for_background( -/*====================*/ - trx_t* trx); /*!< in, own: trx object */ /****************************************************************//** Creates trx objects for transactions and initializes the trx list of trx_sys at database start. Rollback segment and undo log lists must @@ -142,50 +121,26 @@ UNIV_INTERN void trx_lists_init_at_db_start(void); /*============================*/ -/****************************************************************//** -Starts a new transaction. -@return TRUE if success, FALSE if the rollback segment could not -support this many transactions */ -UNIV_INTERN -ibool -trx_start( -/*======*/ - trx_t* trx, /*!< in: transaction */ - ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED - is passed, the system chooses the rollback segment - automatically in a round-robin fashion */ -/****************************************************************//** -Starts a new transaction. -@return TRUE */ -UNIV_INTERN -ibool -trx_start_low( -/*==========*/ - trx_t* trx, /*!< in: transaction */ - ulint rseg_id);/*!< in: rollback segment id; if ULINT_UNDEFINED - is passed, the system chooses the rollback segment - automatically in a round-robin fashion */ /*************************************************************//** Starts the transaction if it is not yet started. */ -UNIV_INLINE +UNIV_INTERN void -trx_start_if_not_started( -/*=====================*/ +trx_start_if_not_started_xa( +/*========================*/ trx_t* trx); /*!< in: transaction */ /*************************************************************//** -Starts the transaction if it is not yet started. Assumes we have reserved -the kernel mutex! */ -UNIV_INLINE +Starts the transaction if it is not yet started. */ +UNIV_INTERN void -trx_start_if_not_started_low( -/*=========================*/ +trx_start_if_not_started( +/*=====================*/ trx_t* trx); /*!< in: transaction */ /****************************************************************//** Commits a transaction. */ UNIV_INTERN void -trx_commit_off_kernel( -/*==================*/ +trx_commit( +/*=======*/ trx_t* trx); /*!< in: transaction */ /****************************************************************//** Cleans up a transaction at database startup. The cleanup is needed if @@ -203,15 +158,14 @@ UNIV_INTERN ulint trx_commit_for_mysql( /*=================*/ - trx_t* trx); /*!< in: trx handle */ + trx_t* trx); /*!< in/out: transaction */ /**********************************************************************//** -Does the transaction prepare for MySQL. -@return 0 or error number */ +Does the transaction prepare for MySQL. */ UNIV_INTERN -ulint +void trx_prepare_for_mysql( /*==================*/ - trx_t* trx); /*!< in: trx handle */ + trx_t* trx); /*!< in/out: trx handle */ /**********************************************************************//** This function is used to find number of prepared transactions and their transaction objects for a recovery. @@ -225,7 +179,9 @@ trx_recover_for_mysql( /*******************************************************************//** This function is used to find one X/Open XA distributed transaction which is in the prepared state -@return trx or NULL; on match, the trx->xid will be invalidated */ +@return trx or NULL; on match, the trx->xid will be invalidated; +note that the trx may have been committed, unless the caller is +holding lock_sys->mutex */ UNIV_INTERN trx_t * trx_get_trx_by_xid( @@ -257,86 +213,20 @@ read_view_t* trx_assign_read_view( /*=================*/ trx_t* trx); /*!< in: active transaction */ -/***********************************************************//** -The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to -the TRX_QUE_RUNNING state and releases query threads which were -waiting for a lock in the wait_thrs list. */ -UNIV_INTERN -void -trx_end_lock_wait( -/*==============*/ - trx_t* trx); /*!< in: transaction */ /****************************************************************//** -Sends a signal to a trx object. */ +Prepares a transaction for commit/rollback. */ UNIV_INTERN void -trx_sig_send( -/*=========*/ - trx_t* trx, /*!< in: trx handle */ - ulint type, /*!< in: signal type */ - ulint sender, /*!< in: TRX_SIG_SELF or - TRX_SIG_OTHER_SESS */ - que_thr_t* receiver_thr, /*!< in: query thread which wants the - reply, or NULL; if type is - TRX_SIG_END_WAIT, this must be NULL */ - trx_savept_t* savept, /*!< in: possible rollback savepoint, or - NULL */ - que_thr_t** next_thr); /*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if the parameter - is NULL, it is ignored */ -/****************************************************************//** -Send the reply message when a signal in the queue of the trx has -been handled. */ -UNIV_INTERN -void -trx_sig_reply( -/*==========*/ - trx_sig_t* sig, /*!< in: signal */ - que_thr_t** next_thr); /*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/****************************************************************//** -Removes the signal object from a trx signal queue. */ -UNIV_INTERN -void -trx_sig_remove( -/*===========*/ - trx_t* trx, /*!< in: trx handle */ - trx_sig_t* sig); /*!< in, own: signal */ -/****************************************************************//** -Starts handling of a trx signal. */ -UNIV_INTERN -void -trx_sig_start_handle( -/*=================*/ - trx_t* trx, /*!< in: trx handle */ - que_thr_t** next_thr); /*!< in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/****************************************************************//** -Ends signal handling. If the session is in the error state, and -trx->graph_before_signal_handling != NULL, returns control to the error -handling routine of the graph (currently only returns the control to the -graph root which then sends an error message to the client). */ -UNIV_INTERN -void -trx_end_signal_handling( -/*====================*/ - trx_t* trx); /*!< in: trx */ +trx_commit_or_rollback_prepare( +/*===========================*/ + trx_t* trx); /*!< in/out: transaction */ /*********************************************************************//** Creates a commit command node struct. @return own: commit node struct */ UNIV_INTERN commit_node_t* -commit_node_create( -/*===============*/ +trx_commit_node_create( +/*===================*/ mem_heap_t* heap); /*!< in: mem heap where created */ /***********************************************************//** Performs an execution step for a commit type node in a query graph. @@ -348,16 +238,53 @@ trx_commit_step( que_thr_t* thr); /*!< in: query thread */ /**********************************************************************//** -Prints info about a transaction to the given file. The caller must own the -kernel mutex. */ +Prints info about a transaction. +Caller must hold trx_sys->mutex. */ +UNIV_INTERN +void +trx_print_low( +/*==========*/ + FILE* f, + /*!< in: output stream */ + const trx_t* trx, + /*!< in: transaction */ + ulint max_query_len, + /*!< in: max query length to print, + or 0 to use the default max length */ + ulint n_lock_rec, + /*!< in: lock_number_of_rows_locked(&trx->lock) */ + ulint n_lock_struct, + /*!< in: length of trx->lock.trx_locks */ + ulint heap_size) + /*!< in: mem_heap_get_size(trx->lock.lock_heap) */ + __attribute__((nonnull)); + +/**********************************************************************//** +Prints info about a transaction. +The caller must hold lock_sys->mutex and trx_sys->mutex. +When possible, use trx_print() instead. */ +UNIV_INTERN +void +trx_print_latched( +/*==============*/ + FILE* f, /*!< in: output stream */ + const trx_t* trx, /*!< in: transaction */ + ulint max_query_len) /*!< in: max query length to print, + or 0 to use the default max length */ + __attribute__((nonnull)); + +/**********************************************************************//** +Prints info about a transaction. +Acquires and releases lock_sys->mutex and trx_sys->mutex. */ UNIV_INTERN void trx_print( /*======*/ - FILE* f, /*!< in: output stream */ - trx_t* trx, /*!< in: transaction */ - ulint max_query_len); /*!< in: max query length to print, or 0 to - use the default max length */ + FILE* f, /*!< in: output stream */ + const trx_t* trx, /*!< in: transaction */ + ulint max_query_len) /*!< in: max query length to print, + or 0 to use the default max length */ + __attribute__((nonnull)); /** Type of data dictionary operation */ typedef enum trx_dict_op { @@ -395,6 +322,37 @@ trx_set_dict_operation( #ifndef UNIV_HOTBACKUP /**********************************************************************//** +Determines if a transaction is in the given state. +The caller must hold trx_sys->mutex, or it must be the thread +that is serving a running transaction. +A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list +unless it is a non-locking autocommit read only transaction, which is only +in trx_sys->mysql_trx_list. +@return TRUE if trx->state == state */ +UNIV_INLINE +ibool +trx_state_eq( +/*=========*/ + const trx_t* trx, /*!< in: transaction */ + trx_state_t state) /*!< in: state; + if state != TRX_STATE_NOT_STARTED + asserts that + trx->state != TRX_STATE_NOT_STARTED */ + __attribute__((nonnull, warn_unused_result)); +# ifdef UNIV_DEBUG +/**********************************************************************//** +Asserts that a transaction has been started. +The caller must hold trx_sys->mutex. +@return TRUE if started */ +UNIV_INTERN +ibool +trx_assert_started( +/*===============*/ + const trx_t* trx) /*!< in: transaction */ + __attribute__((nonnull, warn_unused_result)); +# endif /* UNIV_DEBUG */ + +/**********************************************************************//** Determines if the currently running transaction has been interrupted. @return TRUE if interrupted */ UNIV_INTERN @@ -419,7 +377,7 @@ Calculates the "weight" of a transaction. The weight of one transaction is estimated as the number of altered rows + the number of locked rows. @param t transaction @return transaction weight */ -#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->trx_locks)) +#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks)) /*******************************************************************//** Compares the "weight" (or size) of two transactions. Transactions that @@ -447,61 +405,322 @@ trx_get_que_state_str( /*==================*/ const trx_t* trx); /*!< in: transaction */ -/* Signal to a transaction */ -struct trx_sig_struct{ - unsigned type:3; /*!< signal type */ - unsigned sender:1; /*!< TRX_SIG_SELF or - TRX_SIG_OTHER_SESS */ - que_thr_t* receiver; /*!< non-NULL if the sender of the signal - wants reply after the operation induced - by the signal is completed */ - trx_savept_t savept; /*!< possible rollback savepoint */ - UT_LIST_NODE_T(trx_sig_t) - signals; /*!< queue of pending signals to the - transaction */ - UT_LIST_NODE_T(trx_sig_t) - reply_signals; /*!< list of signals for which the sender - transaction is waiting a reply */ +/*******************************************************************//** +Transactions that aren't started by the MySQL server don't set +the trx_t::mysql_thd field. For such transactions we set the lock +wait timeout to 0 instead of the user configured value that comes +from innodb_lock_wait_timeout via trx_t::mysql_thd. +@param trx transaction +@return lock wait timeout in seconds */ +#define trx_lock_wait_timeout_get(trx) \ + ((trx)->mysql_thd != NULL \ + ? thd_lock_wait_timeout((trx)->mysql_thd) \ + : 0) + +/*******************************************************************//** +Determine if the transaction is a non-locking autocommit select +(implied read-only). +@param t transaction +@return true if non-locking autocommit select transaction. */ +#define trx_is_autocommit_non_locking(t) \ +((t)->auto_commit && (t)->will_lock == 0) + +/*******************************************************************//** +Determine if the transaction is a non-locking autocommit select +with an explicit check for the read-only status. +@param t transaction +@return true if non-locking autocommit read-only transaction. */ +#define trx_is_ac_nl_ro(t) \ +((t)->read_only && trx_is_autocommit_non_locking((t))) + +/*******************************************************************//** +Assert that the transaction is in the trx_sys_t::rw_trx_list */ +#define assert_trx_in_rw_list(t) do { \ + ut_ad(!(t)->read_only); \ + assert_trx_in_list(t); \ +} while (0) + +/*******************************************************************//** +Assert that the transaction is either in trx_sys->ro_trx_list or +trx_sys->rw_trx_list but not both and it cannot be an autocommit +non-locking select */ +#define assert_trx_in_list(t) do { \ + ut_ad((t)->in_ro_trx_list == (t)->read_only); \ + ut_ad((t)->in_rw_trx_list == !(t)->read_only); \ + ut_ad(!trx_is_autocommit_non_locking((t))); \ + switch ((t)->state) { \ + case TRX_STATE_PREPARED: \ + ut_a(!(t)->read_only); \ + /* fall through */ \ + case TRX_STATE_ACTIVE: \ + case TRX_STATE_COMMITTED_IN_MEMORY: \ + continue; \ + case TRX_STATE_NOT_STARTED: \ + break; \ + } \ + ut_error; \ +} while (0) + +#ifdef UNIV_DEBUG +/*******************************************************************//** +Assert that an autocommit non-locking slect cannot be in the +ro_trx_list nor the rw_trx_list and that it is a read-only transaction. +The tranasction must be in the mysql_trx_list. */ +# define assert_trx_nonlocking_or_in_list(t) \ + do { \ + if (trx_is_autocommit_non_locking(t)) { \ + trx_state_t t_state = (t)->state; \ + ut_ad((t)->read_only); \ + ut_ad(!(t)->is_recovered); \ + ut_ad(!(t)->in_ro_trx_list); \ + ut_ad(!(t)->in_rw_trx_list); \ + ut_ad((t)->in_mysql_trx_list); \ + ut_ad(t_state == TRX_STATE_NOT_STARTED \ + || t_state == TRX_STATE_ACTIVE); \ + } else { \ + assert_trx_in_list(t); \ + } \ + } while (0) +#else /* UNIV_DEBUG */ +/*******************************************************************//** +Assert that an autocommit non-locking slect cannot be in the +ro_trx_list nor the rw_trx_list and that it is a read-only transaction. +The tranasction must be in the mysql_trx_list. */ +# define assert_trx_nonlocking_or_in_list(trx) ((void)0) +#endif /* UNIV_DEBUG */ + +/*******************************************************************//** +Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state +captures the state of the query thread during the execution of a query. +This is different from a transaction state. The query state of a transaction +can be updated asynchronously by other threads. The other threads can be +system threads, like the timeout monitor thread or user threads executing +other queries. Another thing to be mindful of is that there is a delay between +when a query thread is put into LOCK_WAIT state and before it actually starts +waiting. Between these two events it is possible that the query thread is +granted the lock it was waiting for, which implies that the state can be changed +asynchronously. + +All these operations take place within the context of locking. Therefore state +changes within the locking code must acquire both the lock mutex and the +trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or +trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient +to only acquire the trx->mutex. +To query the state either of the mutexes is sufficient within the locking +code and no mutex is required when the query thread is no longer waiting. */ + +/** The locks and state of an active transaction. Protected by +lock_sys->mutex, trx->mutex or both. */ +struct trx_lock_struct { + ulint n_active_thrs; /*!< number of active query threads */ + + trx_que_t que_state; /*!< valid when trx->state + == TRX_STATE_ACTIVE: TRX_QUE_RUNNING, + TRX_QUE_LOCK_WAIT, ... */ + + lock_t* wait_lock; /*!< if trx execution state is + TRX_QUE_LOCK_WAIT, this points to + the lock request, otherwise this is + NULL; set to non-NULL when holding + both trx->mutex and lock_sys->mutex; + set to NULL when holding + lock_sys->mutex; readers should + hold lock_sys->mutex, except when + they are holding trx->mutex and + wait_lock==NULL */ + ib_uint64_t deadlock_mark; /*!< A mark field that is initialized + to and checked against lock_mark_counter + by lock_deadlock_recursive(). */ + ibool was_chosen_as_deadlock_victim; + /*!< when the transaction decides to + wait for a lock, it sets this to FALSE; + if another transaction chooses this + transaction as a victim in deadlock + resolution, it sets this to TRUE. + Protected by trx->mutex. */ + time_t wait_started; /*!< lock wait started at this time, + protected only by lock_sys->mutex */ + + que_thr_t* wait_thr; /*!< query thread belonging to this + trx that is in QUE_THR_LOCK_WAIT + state. For threads suspended in a + lock wait, this is protected by + lock_sys->mutex. Otherwise, this may + only be modified by the thread that is + serving the running transaction. */ + + mem_heap_t* lock_heap; /*!< memory heap for trx_locks; + protected by lock_sys->mutex */ + + UT_LIST_BASE_NODE_T(lock_t) + trx_locks; /*!< locks requested + by the transaction; + insertions are protected by trx->mutex + and lock_sys->mutex; removals are + protected by lock_sys->mutex */ + + ib_vector_t* table_locks; /*!< All table locks requested by this + transaction, including AUTOINC locks */ + + ibool cancel; /*!< TRUE if the transaction is being + rolled back either via deadlock + detection or due to lock timeout. The + caller has to acquire the trx_t::mutex + in order to cancel the locks. In + lock_trx_table_locks_remove() we + check for this cancel of a transaction's + locks and avoid reacquiring the trx + mutex to prevent recursive deadlocks. + Protected by both the lock sys mutex + and the trx_t::mutex. */ }; #define TRX_MAGIC_N 91118598 -/* The transaction handle; every session has a trx object which is freed only -when the session is freed; in addition there may be session-less transactions -rolling back after a database recovery */ +/** The transaction handle + +Normally, there is a 1:1 relationship between a transaction handle +(trx) and a session (client connection). One session is associated +with exactly one user transaction. There are some exceptions to this: + +* For DDL operations, a subtransaction is allocated that modifies the +data dictionary tables. Lock waits and deadlocks are prevented by +acquiring the dict_operation_lock before starting the subtransaction +and releasing it after committing the subtransaction. + +* The purge system uses a special transaction that is not associated +with any session. + +* If the system crashed or it was quickly shut down while there were +transactions in the ACTIVE or PREPARED state, these transactions would +no longer be associated with a session when the server is restarted. + +A session may be served by at most one thread at a time. The serving +thread of a session might change in some MySQL implementations. +Therefore we do not have os_thread_get_curr_id() assertions in the code. + +Normally, only the thread that is currently associated with a running +transaction may access (read and modify) the trx object, and it may do +so without holding any mutex. The following are exceptions to this: + +* trx_rollback_resurrected() may access resurrected (connectionless) +transactions while the system is already processing new user +transactions. The trx_sys->mutex prevents a race condition between it +and lock_trx_release_locks() [invoked by trx_commit()]. + +* trx_print_low() may access transactions not associated with the current +thread. The caller must be holding trx_sys->mutex and lock_sys->mutex. + +* When a transaction handle is in the trx_sys->mysql_trx_list or +trx_sys->trx_list, some of its fields must not be modified without +holding trx_sys->mutex exclusively. + +* The locking code (in particular, lock_deadlock_recursive() and +lock_rec_convert_impl_to_expl()) will access transactions associated +to other connections. The locks of transactions are protected by +lock_sys->mutex and sometimes by trx->mutex. */ struct trx_struct{ ulint magic_n; + mutex_t mutex; /*!< Mutex protecting the fields + state and lock + (except some fields of lock, which + are protected by lock_sys->mutex) */ + + /** State of the trx from the point of view of concurrency control + and the valid state transitions. + + Possible states: + + TRX_STATE_NOT_STARTED + TRX_STATE_ACTIVE + TRX_STATE_PREPARED + TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED) + + Valid state transitions are: + + Regular transactions: + * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED + + Auto-commit non-locking read-only: + * NOT_STARTED -> ACTIVE -> NOT_STARTED + + XA (2PC): + * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED + + Recovered XA: + * NOT_STARTED -> PREPARED -> COMMITTED -> (freed) + + XA (2PC) (shutdown before ROLLBACK or COMMIT): + * NOT_STARTED -> PREPARED -> (freed) + + Latching and various transaction lists membership rules: + + XA (2PC) transactions are always treated as read-write and + non-autocommit. + + Transitions to ACTIVE or NOT_STARTED occur when + !in_rw_trx_list and !in_ro_trx_list (no trx_sys->mutex needed). + + Autocommit non-locking read-only transactions move between states + without holding any mutex. They are !in_rw_trx_list, !in_ro_trx_list. + + When a transaction is NOT_STARTED, it can be in_mysql_trx_list if + it is a user transaction. It cannot be in ro_trx_list or rw_trx_list. + + ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list. + The transition ACTIVE->PREPARED is protected by trx_sys->mutex. + + ACTIVE->COMMITTED is possible when the transaction is in + ro_trx_list or rw_trx_list. + + Transitions to COMMITTED are protected by both lock_sys->mutex + and trx->mutex. + + NOTE: Some of these state change constraints are an overkill, + currently only required for a consistent view for printing stats. + This unnecessarily adds a huge cost for the general case. + + NOTE: In the future we should add read only transactions to the + ro_trx_list the first time they try to acquire a lock ie. by default + we treat all read-only transactions as non-locking. */ + trx_state_t state; + + trx_lock_t lock; /*!< Information about the transaction + locks and state. Protected by + trx->mutex or lock_sys->mutex + or both */ + ulint is_recovered; /*!< 0=normal transaction, + 1=recovered, must be rolled back, + protected by trx_sys->mutex when + trx->in_rw_trx_list holds */ + /* These fields are not protected by any mutex. */ const char* op_info; /*!< English text describing the current operation, or an empty string */ - ulint conc_state; /*!< state of the trx from the point - of view of concurrency control: - TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY, - ... */ + ulint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */ + ulint check_foreigns; /*!< normally TRUE, but if the user + wants to suppress foreign key checks, + (in table imports, for example) we + set this FALSE */ /*------------------------------*/ /* MySQL has a transaction coordinator to coordinate two phase - commit between multiple storage engines and the binary log. When - an engine participates in a transaction, it's responsible for - registering itself using the trans_register_ha() API. */ + commit between multiple storage engines and the binary log. When + an engine participates in a transaction, it's responsible for + registering itself using the trans_register_ha() API. */ unsigned is_registered:1;/* This flag is set to 1 after the - transaction has been registered with - the coordinator using the XA API, and - is set to 0 after commit or rollback. */ + transaction has been registered with + the coordinator using the XA API, and + is set to 0 after commit or rollback. */ unsigned active_commit_ordered:1;/* 1 if owns prepare mutex, if this is set to 1 then registered should also be set to 1. This is used in the XA code */ /*------------------------------*/ - ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */ - ulint check_foreigns; /* normally TRUE, but if the user - wants to suppress foreign key checks, - (in table imports, for example) we - set this FALSE */ ulint check_unique_secondary; - /* normally TRUE, but if the user + /*!< normally TRUE, but if the user wants to speed up inserts by suppressing unique key checks for secondary indexes when we decide @@ -518,104 +737,112 @@ struct trx_struct{ defer flush of the logs to disk until after we release the mutex. */ - ulint must_flush_log_later;/* this flag is set to TRUE in - trx_commit_off_kernel() if - flush_log_later was TRUE, and there - were modifications by the transaction; - in that case we must flush the log - in trx_commit_complete_for_mysql() */ + ulint must_flush_log_later;/*!< this flag is set to TRUE in + trx_commit() if flush_log_later was + TRUE, and there were modifications by + the transaction; in that case we must + flush the log in + trx_commit_complete_for_mysql() */ ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ ulint has_search_latch; - /* TRUE if this trx has latched the + /*!< TRUE if this trx has latched the search system latch in S-mode */ - ulint deadlock_mark; /*!< a mark field used in deadlock - checking algorithm. */ + ulint search_latch_timeout; + /*!< If we notice that someone is + waiting for our S-lock on the search + latch to be released, we wait in + row0sel.cc for BTR_SEA_TIMEOUT new + searches until we try to keep + the search latch again over + calls from MySQL; this is intended + to reduce contention on the search + latch */ trx_dict_op_t dict_operation; /**< @see enum trx_dict_op */ /* Fields protected by the srv_conc_mutex. */ ulint declared_to_be_inside_innodb; - /* this is TRUE if we have declared + /*!< this is TRUE if we have declared this transaction in srv_conc_enter_innodb to be inside the InnoDB engine */ - - /* Fields protected by dict_operation_lock. The very latch - it is used to track. */ + ulint n_tickets_to_enter_innodb; + /*!< this can be > 0 only when + declared_to_... is TRUE; when we come + to srv_conc_innodb_enter, if the value + here is > 0, we decrement this by 1 */ ulint dict_operation_lock_mode; /*!< 0, RW_S_LATCH, or RW_X_LATCH: the latch mode trx currently holds - on dict_operation_lock */ + on dict_operation_lock. Protected + by dict_operation_lock. */ + + trx_id_t no; /*!< transaction serialization number: + max trx id shortly before the + transaction is moved to + COMMITTED_IN_MEMORY state. + Protected by trx_sys_t::mutex + when trx->in_rw_trx_list. Initially + set to IB_ULONGLONG_MAX. */ - /* All the next fields are protected by the kernel mutex, except the - undo logs which are protected by undo_mutex */ - ulint is_purge; /*!< 0=user transaction, 1=purge */ - ulint is_recovered; /*!< 0=normal transaction, - 1=recovered, must be rolled back */ - ulint que_state; /*!< valid when conc_state - == TRX_ACTIVE: TRX_QUE_RUNNING, - TRX_QUE_LOCK_WAIT, ... */ - ulint handling_signals;/* this is TRUE as long as the trx - is handling signals */ time_t start_time; /*!< time the trx object was created or the state last time became - TRX_ACTIVE */ + TRX_STATE_ACTIVE */ trx_id_t id; /*!< transaction id */ XID xid; /*!< X/Open XA transaction identification to identify a transaction branch */ - trx_id_t no; /*!< transaction serialization number == - max trx id when the transaction is - moved to COMMITTED_IN_MEMORY state */ - ib_uint64_t commit_lsn; /*!< lsn at the time of the commit */ + lsn_t commit_lsn; /*!< lsn at the time of the commit */ table_id_t table_id; /*!< Table to drop iff dict_operation is TRUE, or 0. */ /*------------------------------*/ void* mysql_thd; /*!< MySQL thread handle corresponding to this trx, or NULL */ const char* mysql_log_file_name; - /* if MySQL binlog is used, this field + /*!< if MySQL binlog is used, this field contains a pointer to the latest file name; this is NULL if binlog is not used */ - ib_int64_t mysql_log_offset;/* if MySQL binlog is used, this field - contains the end offset of the binlog - entry */ + ib_int64_t mysql_log_offset; + /*!< if MySQL binlog is used, this + field contains the end offset of the + binlog entry */ /*------------------------------*/ - ulint n_mysql_tables_in_use; /* number of Innobase tables + ulint n_mysql_tables_in_use; /*!< number of Innobase tables used in the processing of the current SQL statement in MySQL */ ulint mysql_n_tables_locked; - /* how many tables the current SQL + /*!< how many tables the current SQL statement uses, except those in consistent read */ - ulint search_latch_timeout; - /* If we notice that someone is - waiting for our S-lock on the search - latch to be released, we wait in - row0sel.c for BTR_SEA_TIMEOUT new - searches until we try to keep - the search latch again over - calls from MySQL; this is intended - to reduce contention on the search - latch */ - /*------------------------------*/ - ulint n_tickets_to_enter_innodb; - /* this can be > 0 only when - declared_to_... is TRUE; when we come - to srv_conc_innodb_enter, if the value - here is > 0, we decrement this by 1 */ /*------------------------------*/ UT_LIST_NODE_T(trx_t) - trx_list; /*!< list of transactions */ + trx_list; /*!< list of transactions; + protected by trx_sys->mutex. + The same node is used for both + trx_sys_t::ro_trx_list and + trx_sys_t::rw_trx_list */ +#ifdef UNIV_DEBUG + /** The following two fields are mutually exclusive. */ + /* @{ */ + + ibool in_ro_trx_list; /*!< TRUE if in trx_sys->ro_trx_list */ + ibool in_rw_trx_list; /*!< TRUE if in trx_sys->rw_trx_list */ + /* @} */ +#endif /* UNIV_DEBUG */ UT_LIST_NODE_T(trx_t) mysql_trx_list; /*!< list of transactions created for - MySQL */ + MySQL; protected by trx_sys->mutex */ +#ifdef UNIV_DEBUG + ibool in_mysql_trx_list; + /*!< TRUE if in + trx_sys->mysql_trx_list */ +#endif /* UNIV_DEBUG */ /*------------------------------*/ - ulint error_state; /*!< 0 if no error, otherwise error + enum db_err error_state; /*!< 0 if no error, otherwise error number; NOTE That ONLY the thread doing the transaction is allowed to set this field: this is NOT protected - by the kernel mutex */ + by any mutex */ const dict_index_t*error_info; /*!< if the error number indicates a duplicate key error, a pointer to the problematic index is stored here */ @@ -629,50 +856,11 @@ struct trx_struct{ survive over a transaction commit, if it is a stored procedure with a COMMIT WORK statement, for instance */ - ulint n_active_thrs; /*!< number of active query threads */ - que_t* graph_before_signal_handling; - /* value of graph when signal handling - for this trx started: this is used to - return control to the original query - graph for error processing */ - trx_sig_t sig; /*!< one signal object can be allocated - in this space, avoiding mem_alloc */ - UT_LIST_BASE_NODE_T(trx_sig_t) - signals; /*!< queue of processed or pending - signals to the trx */ - UT_LIST_BASE_NODE_T(trx_sig_t) - reply_signals; /*!< list of signals sent by the query - threads of this trx for which a thread - is waiting for a reply; if this trx is - killed, the reply requests in the list - must be canceled */ - /*------------------------------*/ - lock_t* wait_lock; /*!< if trx execution state is - TRX_QUE_LOCK_WAIT, this points to - the lock request, otherwise this is - NULL */ - ibool was_chosen_as_deadlock_victim; - /* when the transaction decides to wait - for a lock, it sets this to FALSE; - if another transaction chooses this - transaction as a victim in deadlock - resolution, it sets this to TRUE */ - time_t wait_started; /*!< lock wait started at this time */ - UT_LIST_BASE_NODE_T(que_thr_t) - wait_thrs; /*!< query threads belonging to this - trx that are in the QUE_THR_LOCK_WAIT - state */ - /*------------------------------*/ - mem_heap_t* lock_heap; /*!< memory heap for the locks of the - transaction */ - UT_LIST_BASE_NODE_T(lock_t) - trx_locks; /*!< locks reserved by the transaction */ - /*------------------------------*/ mem_heap_t* global_read_view_heap; - /* memory heap for the global read + /*!< memory heap for the global read view */ read_view_t* global_read_view; - /* consistent read view associated + /*!< consistent read view associated to a transaction or NULL */ read_view_t* read_view; /*!< consistent read view used in the transaction or NULL, this read view @@ -699,7 +887,7 @@ struct trx_struct{ the number of modified/inserted rows in a transaction */ trx_savept_t last_sql_stat_start; - /* undo_no when the last sql statement + /*!< undo_no when the last sql statement was started: in case of an error, trx is rolled back down to this undo number; see note at undo_mutex! */ @@ -725,29 +913,32 @@ struct trx_struct{ transaction. Note that these are also in the lock list trx_locks. This vector needs to be freed explicitly - when the trx_t instance is desrtoyed */ + when the trx instance is destroyed. + Protected by lock_sys->mutex. */ + /*------------------------------*/ + ibool read_only; /*!< TRUE if transaction is flagged + as a READ-ONLY transaction. + if !auto_commit || will_lock > 0 + then it will added to the list + trx_sys_t::ro_trx_list. A read only + transaction will not be assigned an + UNDO log. Non-locking auto-commit + read-only transaction will not be on + either list. */ + ibool auto_commit; /*!< TRUE if it is an autocommit */ + ulint will_lock; /*!< Will acquire some locks. Increment + each time we determine that a lock will + be acquired by the MySQL layer. */ + /*------------------------------*/ + fts_trx_t* fts_trx; /* FTS information, or NULL if + transaction hasn't modified tables + with FTS indexes (yet). */ + doc_id_t fts_next_doc_id;/* The document id used for updates */ /*------------------------------*/ char detailed_error[256]; /*!< detailed error message for last error, or empty. */ }; -#define TRX_MAX_N_THREADS 32 /* maximum number of - concurrent threads running a - single operation of a - transaction, e.g., a parallel - query */ -/* Transaction concurrency states (trx->conc_state) */ -#define TRX_NOT_STARTED 0 -#define TRX_ACTIVE 1 -#define TRX_COMMITTED_IN_MEMORY 2 -#define TRX_PREPARED 3 /* Support for 2PC/XA */ - -/* Transaction execution states when trx->conc_state == TRX_ACTIVE */ -#define TRX_QUE_RUNNING 0 /* transaction is running */ -#define TRX_QUE_LOCK_WAIT 1 /* transaction is waiting for a lock */ -#define TRX_QUE_ROLLING_BACK 2 /* transaction is rolling back */ -#define TRX_QUE_COMMITTING 3 /* transaction is committing */ - /* Transaction isolation levels (trx->isolation_level) */ #define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking SELECTs are performed so that @@ -794,7 +985,6 @@ Multiple flags can be combined with bitwise OR. */ #define TRX_SIG_TOTAL_ROLLBACK 1 #define TRX_SIG_ROLLBACK_TO_SAVEPT 2 #define TRX_SIG_COMMIT 3 -#define TRX_SIG_ERROR_OCCURRED 4 #define TRX_SIG_BREAK_EXECUTION 5 /* Sender types of a signal */ @@ -820,6 +1010,36 @@ struct commit_node_struct{ }; +/** Test if trx->mutex is owned. */ +#define trx_mutex_own(t) mutex_own(&t->mutex) + +/** Acquire the trx->mutex. */ +#define trx_mutex_enter(t) do { \ + mutex_enter(&t->mutex); \ +} while (0) + +/** Release the trx->mutex. */ +#define trx_mutex_exit(t) do { \ + mutex_exit(&t->mutex); \ +} while (0) + +/** @brief The latch protecting the adaptive search system + +This latch protects the +(1) hash index; +(2) columns of a record to which we have a pointer in the hash index; + +but does NOT protect: + +(3) next record offset field in a record; +(4) next or previous records on the same page. + +Bear in mind (3) and (4) when using the hash index. +*/ +extern rw_lock_t* btr_search_latch_temp; + +/** The latch protecting the adaptive search system */ +#define btr_search_latch (*btr_search_latch_temp) #ifndef UNIV_NONINL #include "trx0trx.ic" diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic index 4a1d3bcde0b..ceeb121ab70 100644 --- a/storage/innobase/include/trx0trx.ic +++ b/storage/innobase/include/trx0trx.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -23,37 +23,48 @@ The transaction Created 3/26/1996 Heikki Tuuri *******************************************************/ -/*************************************************************//** -Starts the transaction if it is not yet started. */ -UNIV_INLINE -void -trx_start_if_not_started( -/*=====================*/ - trx_t* trx) /*!< in: transaction */ -{ - ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY); - - if (trx->conc_state == TRX_NOT_STARTED) { - - trx_start(trx, ULINT_UNDEFINED); - } -} - -/*************************************************************//** -Starts the transaction if it is not yet started. Assumes we have reserved -the kernel mutex! */ +/**********************************************************************//** +Determines if a transaction is in the given state. +The caller must hold trx_sys->mutex, or it must be the thread +that is serving a running transaction. +A running transaction must be in trx_sys->ro_trx_list or trx_sys->rw_trx_list +unless it is a non-locking autocommit read only transaction, which is only +in trx_sys->mysql_trx_list. +@return TRUE if trx->state == state */ UNIV_INLINE -void -trx_start_if_not_started_low( -/*=========================*/ - trx_t* trx) /*!< in: transaction */ +ibool +trx_state_eq( +/*=========*/ + const trx_t* trx, /*!< in: transaction */ + trx_state_t state) /*!< in: state; + if state != TRX_STATE_NOT_STARTED + asserts that + trx->state != TRX_STATE_NOT_STARTED */ { - ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY); - - if (trx->conc_state == TRX_NOT_STARTED) { - - trx_start_low(trx, ULINT_UNDEFINED); +#ifdef UNIV_DEBUG + switch (trx->state) { + case TRX_STATE_PREPARED: + assert_trx_in_rw_list(trx); + return(trx->state == state); + + case TRX_STATE_ACTIVE: + assert_trx_nonlocking_or_in_list(trx); + return(state == trx->state); + + case TRX_STATE_COMMITTED_IN_MEMORY: + assert_trx_in_list(trx); + return(state == trx->state); + + case TRX_STATE_NOT_STARTED: + /* This state is not allowed for running transactions. */ + ut_a(state == TRX_STATE_NOT_STARTED); + ut_ad(!trx->in_rw_trx_list); + ut_ad(!trx->in_ro_trx_list); + return(state == trx->state); } + ut_error; +#endif /* UNIV_DEBUG */ + return(trx->state == state); } /****************************************************************//** @@ -79,7 +90,7 @@ trx_get_que_state_str( const trx_t* trx) /*!< in: transaction */ { /* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */ - switch (trx->que_state) { + switch (trx->lock.que_state) { case TRX_QUE_RUNNING: return("RUNNING"); case TRX_QUE_LOCK_WAIT: @@ -113,7 +124,7 @@ trx_get_dict_operation( } ut_error; #endif /* UNIV_DEBUG */ - return((enum trx_dict_op) UNIV_EXPECT(op, TRX_DICT_OP_NONE)); + return((enum trx_dict_op) op); } /**********************************************************************//** Flag a transaction a dictionary operation. */ @@ -150,3 +161,19 @@ ok: trx->dict_operation = op; } + +/********************************************************************//** +Releases the search latch if trx has reserved it. */ +UNIV_INLINE +void +trx_search_latch_release_if_reserved( +/*=================================*/ + trx_t* trx) /*!< in: transaction */ +{ + if (trx->has_search_latch) { + rw_lock_s_unlock(&btr_search_latch); + + trx->has_search_latch = FALSE; + } +} + diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h index a4115b5aca7..650d5878e64 100644 --- a/storage/innobase/include/trx0types.h +++ b/storage/innobase/include/trx0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -29,20 +29,37 @@ Created 3/26/1996 Heikki Tuuri #include "ut0byte.h" /** printf(3) format used for printing DB_TRX_ID and other system fields */ -#define TRX_ID_FMT "%llX" +#define TRX_ID_FMT IB_ID_FMT /** maximum length that a formatted trx_t::id could take, not including the terminating NUL character. */ #define TRX_ID_MAX_LEN 17 +/** Transaction execution states when trx->state == TRX_STATE_ACTIVE */ +enum trx_que_enum { + TRX_QUE_RUNNING, /*!< transaction is running */ + TRX_QUE_LOCK_WAIT, /*!< transaction is waiting for + a lock */ + TRX_QUE_ROLLING_BACK, /*!< transaction is rolling back */ + TRX_QUE_COMMITTING /*!< transaction is committing */ +}; + +/** Transaction states (trx_t::state) */ +enum trx_state_enum { + TRX_STATE_NOT_STARTED, + TRX_STATE_ACTIVE, + TRX_STATE_PREPARED, /* Support for 2PC/XA */ + TRX_STATE_COMMITTED_IN_MEMORY +}; + /** Memory objects */ /* @{ */ /** Transaction */ typedef struct trx_struct trx_t; +/** The locks and state of an active transaction */ +typedef struct trx_lock_struct trx_lock_t; /** Transaction system */ typedef struct trx_sys_struct trx_sys_t; -/** Doublewrite information */ -typedef struct trx_doublewrite_struct trx_doublewrite_t; /** Signal */ typedef struct trx_sig_struct trx_sig_t; /** Rollback segment */ @@ -61,6 +78,10 @@ typedef struct roll_node_struct roll_node_t; typedef struct commit_node_struct commit_node_t; /** SAVEPOINT command node in a query graph */ typedef struct trx_named_savept_struct trx_named_savept_t; +/** Transaction concurrency state */ +typedef enum trx_state_enum trx_state_t; +/** Transaction query thread state */ +typedef enum trx_que_enum trx_que_t; /* @} */ /** Rollback contexts */ diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index 4a1e40af505..ed2ce66bbb6 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -282,7 +282,7 @@ trx_undo_lists_init( Assigns an undo log for a transaction. A new undo log is created or a cached undo log reused. @return DB_SUCCESS if undo log assign successful, possible error codes -are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE +are: DB_TOO_MANY_CONCURRENT_TRXS DB_OUT_OF_FILE_SPACE DB_READ_ONLY DB_OUT_OF_MEMORY */ UNIV_INTERN ulint @@ -412,8 +412,8 @@ struct trx_undo_struct{ TRX_UNDO_UPDATE */ ulint state; /*!< state of the corresponding undo log segment */ - ibool del_marks; /*!< relevant only in an update undo log: - this is TRUE if the transaction may + ibool del_marks; /*!< relevant only in an update undo + log: this is TRUE if the transaction may have delete marked records, because of a delete of a row or an update of an indexed field; purge is then @@ -435,8 +435,8 @@ struct trx_undo_struct{ in bytes, or 0 for uncompressed */ ulint hdr_page_no; /*!< page number of the header page in the undo log */ - ulint hdr_offset; /*!< header offset of the undo log on the - page */ + ulint hdr_offset; /*!< header offset of the undo log on + the page */ ulint last_page_no; /*!< page number of the last page in the undo log; this may differ from top_page_no during a rollback */ @@ -582,8 +582,8 @@ quite a large overhead. */ #define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4) /*--------------------------------------------------------------*/ #define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE) - /*!< Total size of the undo log header - with the XA XID */ + /*!< Total size of the undo log header + with the XA XID */ /* @} */ #ifndef UNIV_NONINL diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic index b81330f7f8b..4b38e63297c 100644 --- a/storage/innobase/include/trx0undo.ic +++ b/storage/innobase/include/trx0undo.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/trx0xa.h b/storage/innobase/include/trx0xa.h index e0dd8a1af5b..7caddfb7ba4 100644 --- a/storage/innobase/include/trx0xa.h +++ b/storage/innobase/include/trx0xa.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index ce7181e7bd5..60eb1fede91 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -1,8 +1,7 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2009, Sun Microsystems, Inc. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -10,12 +9,6 @@ briefly in the InnoDB documentation. The contributions by Google are incorporated with their permission, and subject to the conditions contained in the file COPYING.Google. -Portions of this file contain modifications contributed and copyrighted by -Sun Microsystems, Inc. Those modifications are gratefully acknowledged and -are described briefly in the InnoDB documentation. The contributions by -Sun Microsystems are incorporated with their permission, and subject to the -conditions contained in the file COPYING.Sun_Microsystems. - This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. @@ -25,8 +18,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -50,8 +43,8 @@ Created 1/20/1994 Heikki Tuuri #define IB_TO_STR(s) _IB_TO_STR(s) #define INNODB_VERSION_MAJOR 1 -#define INNODB_VERSION_MINOR 1 -#define INNODB_VERSION_BUGFIX 8 +#define INNODB_VERSION_MINOR 2 +#define INNODB_VERSION_BUGFIX MYSQL_VERSION_PATCH /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -68,8 +61,8 @@ component, i.e. we show M.N.P as M.N */ IB_TO_STR(INNODB_VERSION_BUGFIX) #define REFMAN "http://dev.mysql.com/doc/refman/" \ - IB_TO_STR(MYSQL_MAJOR_VERSION) "." \ - IB_TO_STR(MYSQL_MINOR_VERSION) "/en/" + IB_TO_STR(MYSQL_VERSION_MAJOR) "." \ + IB_TO_STR(MYSQL_VERSION_MINOR) "/en/" #ifdef MYSQL_DYNAMIC_PLUGIN /* In the dynamic plugin, redefine some externally visible symbols @@ -103,10 +96,10 @@ if we are compiling on Windows. */ # include <my_pthread.h> #endif /* UNIV_HOTBACKUP */ -/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */ +/* Include <sys/stat.h> to get S_I... macros defined for os0file.cc */ # include <sys/stat.h> # if !defined(__WIN__) -# include <sys/mman.h> /* mmap() for os0proc.c */ +# include <sys/mman.h> /* mmap() for os0proc.cc */ # endif /* Include the header file generated by GNU autoconf */ @@ -123,21 +116,21 @@ if we are compiling on Windows. */ /* We only try to do explicit inlining of functions with gcc and Sun Studio */ -# if !defined(__GNUC__) && !(defined(__SUNPRO_C) || defined(__SUNPRO_CC)) -# undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */ -# define UNIV_MUST_NOT_INLINE -# endif - # ifdef HAVE_PREAD # define HAVE_PWRITE # endif #endif /* #if (defined(WIN32) || ... */ +#ifndef __WIN__ +#define __STDC_FORMAT_MACROS /* Enable C99 printf format macros */ +#include <inttypes.h> +#endif /* !__WIN__ */ + /* Following defines are to enable performance schema instrumentation in each of four InnoDB modules if HAVE_PSI_INTERFACE is defined. */ -#ifdef HAVE_PSI_INTERFACE +#if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP # define UNIV_PFS_MUTEX # define UNIV_PFS_RWLOCK /* For I/O instrumentation, performance schema rely @@ -149,8 +142,22 @@ resolved */ # define UNIV_PFS_IO # endif # define UNIV_PFS_THREAD + +/* There are mutexes/rwlocks that we want to exclude from +instrumentation even if their corresponding performance schema +define is set. And this PFS_NOT_INSTRUMENTED is used +as the key value to identify those objects that would +be excluded from instrumentation. */ +# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED + +# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED) + #endif /* HAVE_PSI_INTERFACE */ +#ifdef __WIN__ +# define YY_NO_UNISTD_H 1 +#endif /* __WIN__ */ + /* DEBUG VERSION CONTROL ===================== */ @@ -178,8 +185,6 @@ command. Not tested on Windows. */ debugging without UNIV_DEBUG */ #define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column debugging without UNIV_DEBUG */ -#define UNIV_BLOB_NULL_DEBUG /* Enable deep off-page - column debugging */ #define UNIV_DEBUG /* Enable ut_ad() assertions and disable UNIV_INLINE */ #define UNIV_DEBUG_LOCK_VALIDATE /* Enable @@ -200,6 +205,9 @@ assumes that no BLOBs survive server restart */ #define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer; this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES, and the insert buffer must be empty when the database is started */ +#define UNIV_PERF_DEBUG /* debug flag that enables + light weight performance + related stuff. */ #define UNIV_SYNC_DEBUG /* debug mutex and latch operations (very slow); also UNIV_DEBUG must be defined */ #define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */ @@ -208,7 +216,7 @@ operations (very slow); also UNIV_DEBUG must be defined */ #define UNIV_SEARCH_PERF_STAT /* statistics for the adaptive hash index */ #define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output - in sync0sync.c */ + in sync0sync.cc */ #define UNIV_BTR_PRINT /* enable functions for printing B-trees */ #define UNIV_ZIP_DEBUG /* extensive consistency checks @@ -218,6 +226,11 @@ operations (very slow); also UNIV_DEBUG must be defined */ #define UNIV_AIO_DEBUG /* prints info about submitted and reaped AIO requests to the log. */ +#define UNIV_STATS_DEBUG /* prints various stats + related debug info from + dict0stats.c */ +#define FTS_INTERNAL_DIAG_PRINT /* FTS internal debugging + info output */ #endif #define UNIV_BTR_DEBUG /* check B-tree links */ @@ -240,7 +253,9 @@ easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */ #else # define UNIV_INTERN #endif -#if defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3) +#if defined(INNODB_COMPILER_HINTS) \ + && defined __GNUC__ \ + && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3) /** Starting with GCC 4.3, the "cold" attribute is used to inform the compiler that a function is unlikely executed. The function is optimized for size rather than speed and on many targets it is placed @@ -257,37 +272,31 @@ rarely invoked function for size instead for speed. */ #ifndef UNIV_MUST_NOT_INLINE /* Definition for inline version */ -#ifdef __WIN__ -# define UNIV_INLINE __inline -#elif defined(__SUNPRO_CC) || defined(__SUNPRO_C) -# define UNIV_INLINE static inline -#else -# define UNIV_INLINE static __inline__ -#endif +#define UNIV_INLINE static inline -#else +#else /* !UNIV_MUST_NOT_INLINE */ /* If we want to compile a noninlined version we use the following macro definitions: */ #define UNIV_NONINL #define UNIV_INLINE UNIV_INTERN -#endif /* UNIV_DEBUG */ +#endif /* !UNIV_MUST_NOT_INLINE */ #ifdef _WIN32 #define UNIV_WORD_SIZE 4 #elif defined(_WIN64) #define UNIV_WORD_SIZE 8 #else -/* MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */ +/** MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */ #define UNIV_WORD_SIZE SIZEOF_LONG #endif -/* The following alignment is used in memory allocations in memory heap +/** The following alignment is used in memory allocations in memory heap management to ensure correct alignment for doubles etc. */ -#define UNIV_MEM_ALIGNMENT 8 +#define UNIV_MEM_ALIGNMENT 8 -/* The following alignment is used in aligning lints etc. */ +/** The following alignment is used in aligning lints etc. */ #define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE /* @@ -295,31 +304,93 @@ management to ensure correct alignment for doubles etc. */ ======================== */ -/* The 2-logarithm of UNIV_PAGE_SIZE: */ -#define UNIV_PAGE_SIZE_SHIFT 14 -/* The universal page size of the database */ -#define UNIV_PAGE_SIZE (1 << UNIV_PAGE_SIZE_SHIFT) - -/* Maximum number of parallel threads in a parallelized operation */ +/** There are currently two InnoDB file formats which are used to group +features with similar restrictions and dependencies. Using an enum allows +switch statements to give a compiler warning when a new one is introduced. */ +enum innodb_file_formats_enum { + /** Antelope File Format: InnoDB/MySQL up to 5.1. + This format includes REDUNDANT and COMPACT row formats */ + UNIV_FORMAT_A = 0, + + /** Barracuda File Format: Introduced in InnoDB plugin for 5.1: + This format includes COMPRESSED and DYNAMIC row formats. It + includes the ability to create secondary indexes from data that + is not on the clustered index page and the ability to store more + data off the clustered index page. */ + UNIV_FORMAT_B = 1 +}; + +typedef enum innodb_file_formats_enum innodb_file_formats_t; + +/** Minimum supported file format */ +#define UNIV_FORMAT_MIN UNIV_FORMAT_A + +/** Maximum supported file format */ +#define UNIV_FORMAT_MAX UNIV_FORMAT_B + +/** The 2-logarithm of UNIV_PAGE_SIZE: */ +#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift + +/** The universal page size of the database */ +#define UNIV_PAGE_SIZE srv_page_size + +/** log2 of smallest compressed page size (1<<10 == 1024 bytes) +Note: This must never change! */ +#define UNIV_ZIP_SIZE_SHIFT_MIN 10 + +/** log2 of largest compressed page size (1<<14 == 16384 bytes). +A compressed page directory entry reserves 14 bits for the start offset +and 2 bits for flags. This limits the uncompressed page size to 16k. +Even though a 16k uncompressed page can theoretically be compressed +into a larger compressed page, it is not a useful feature so we will +limit both with this same constant. */ +#define UNIV_ZIP_SIZE_SHIFT_MAX 14 + +/* Define the Min, Max, Default page sizes. */ +/** Minimum Page Size Shift (power of 2) */ +#define UNIV_PAGE_SIZE_SHIFT_MIN 12 +/** Maximum Page Size Shift (power of 2) */ +#define UNIV_PAGE_SIZE_SHIFT_MAX 14 +/** Default Page Size Shift (power of 2) */ +#define UNIV_PAGE_SIZE_SHIFT_DEF 14 +/** Original 16k InnoDB Page Size Shift, in case the default changes */ +#define UNIV_PAGE_SIZE_SHIFT_ORIG 14 + +/** Minimum page size InnoDB currently supports. */ +#define UNIV_PAGE_SIZE_MIN (1 << UNIV_PAGE_SIZE_SHIFT_MIN) +/** Maximum page size InnoDB currently supports. */ +#define UNIV_PAGE_SIZE_MAX (1 << UNIV_PAGE_SIZE_SHIFT_MAX) +/** Default page size for InnoDB tablespaces. */ +#define UNIV_PAGE_SIZE_DEF (1 << UNIV_PAGE_SIZE_SHIFT_DEF) +/** Original 16k page size for InnoDB tablespaces. */ +#define UNIV_PAGE_SIZE_ORIG (1 << UNIV_PAGE_SIZE_SHIFT_ORIG) + +/** Smallest compressed page size */ +#define UNIV_ZIP_SIZE_MIN (1 << UNIV_ZIP_SIZE_SHIFT_MIN) + +/** Largest compressed page size */ +#define UNIV_ZIP_SIZE_MAX (1 << UNIV_ZIP_SIZE_SHIFT_MAX) + +/** Number of supported page sizes (The convention 'ssize' is used +for 'log2 minus 9' or the number of shifts starting with 512.) +This number varies depending on UNIV_PAGE_SIZE. */ +#define UNIV_PAGE_SSIZE_MAX \ + (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1) + +/** Maximum number of parallel threads in a parallelized operation */ #define UNIV_MAX_PARALLELISM 32 -/** This is the "mbmaxlen" for my_charset_filename (defined in -strings/ctype-utf8.c), which is used to encode File and Database names. */ -#define FILENAME_CHARSET_MAXNAMLEN 5 - -/** The maximum length of an encode table name in bytes. The max -table and database names are NAME_CHAR_LEN (64) characters. After the -encoding, the max length would be NAME_CHAR_LEN (64) * -FILENAME_CHARSET_MAXNAMLEN (5) = 320 bytes. The number does not include a -terminating '\0'. InnoDB can handle longer names internally */ -#define MAX_TABLE_NAME_LEN 320 - +/** The maximum length of a table name. This is the MySQL limit and is +defined in mysql_com.h like NAME_CHAR_LEN*SYSTEM_CHARSET_MBMAXLEN, the +number does not include a terminating '\0'. InnoDB probably can handle +longer names internally */ +#define MAX_TABLE_NAME_LEN 192 -/* The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is +/** The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is the MySQL's NAME_LEN, see check_and_convert_db_name(). */ #define MAX_DATABASE_NAME_LEN MAX_TABLE_NAME_LEN -/* MAX_FULL_NAME_LEN defines the full name path including the +/** MAX_FULL_NAME_LEN defines the full name path including the database name and table name. In addition, 14 bytes is added for: 2 for surrounding quotes around table name 1 for the separating dot (.) @@ -335,39 +406,41 @@ database name and table name. In addition, 14 bytes is added for: /* Note that inside MySQL 'byte' is defined as char on Linux! */ #define byte unsigned char -/* Define an unsigned integer type that is exactly 32 bits. */ - -#if SIZEOF_INT == 4 -typedef unsigned int ib_uint32_t; -#elif SIZEOF_LONG == 4 -typedef unsigned long ib_uint32_t; -#else -#error "Neither int or long is 4 bytes" -#endif - /* Another basic type we use is unsigned long integer which should be equal to the word size of the machine, that is on a 32-bit platform 32 bits, and on a 64-bit platform 64 bits. We also give the printf format for the type as a macro ULINTPF. */ + +#ifdef __WIN__ +/* Use the integer types and formatting strings defined in Visual Studio. */ +# define UINT32PF "%I32u" +# define INT64PF "%I64d" +# define UINT64PF "%I64u" +typedef __int64 ib_int64_t; +typedef unsigned __int64 ib_uint64_t; +typedef unsigned __int32 ib_uint32_t; +#else +/* Use the integer types and formatting strings defined in the C99 standard. */ +# define UINT32PF "%"PRIu32 +# define INT64PF "%"PRId64 +# define UINT64PF "%"PRIu64 +typedef int64_t ib_int64_t; +typedef uint64_t ib_uint64_t; +typedef uint32_t ib_uint32_t; +# endif /* __WIN__ */ + +# define IB_ID_FMT UINT64PF + #ifdef _WIN64 typedef unsigned __int64 ulint; -#define ULINTPF "%I64u" typedef __int64 lint; +# define ULINTPF UINT64PF #else typedef unsigned long int ulint; -#define ULINTPF "%lu" typedef long int lint; -#endif - -#ifdef __WIN__ -typedef __int64 ib_int64_t; -typedef unsigned __int64 ib_uint64_t; -#elif !defined(UNIV_HOTBACKUP) -/* Note: longlong and ulonglong come from MySQL headers. */ -typedef longlong ib_int64_t; -typedef ulonglong ib_uint64_t; -#endif +# define ULINTPF "%lu" +#endif /* _WIN64 */ #ifndef UNIV_HOTBACKUP typedef unsigned long long int ullint; @@ -379,27 +452,33 @@ typedef unsigned long long int ullint; #endif #endif -/* The 'undefined' value for a ulint */ +/** The 'undefined' value for a ulint */ #define ULINT_UNDEFINED ((ulint)(-1)) +#define ULONG_UNDEFINED ((ulong)(-1)) + +/** The 'undefined' value for a ib_uint64_t */ +#define UINT64_UNDEFINED ((ib_uint64_t)(-1)) + /** The bitmask of 32-bit unsigned integer */ #define ULINT32_MASK 0xFFFFFFFF -/* The undefined 32-bit unsigned integer */ +/** The undefined 32-bit unsigned integer */ #define ULINT32_UNDEFINED ULINT32_MASK -/* Maximum value for a ulint */ +/** Maximum value for a ulint */ #define ULINT_MAX ((ulint)(-2)) -/* Maximum value for ib_uint64_t */ +/** Maximum value for ib_uint64_t */ #define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL)) +#define IB_UINT64_MAX IB_ULONGLONG_MAX /** The generic InnoDB system object identifier data type */ typedef ib_uint64_t ib_id_t; -/* The 'undefined' value for a ullint */ +/** The 'undefined' value for a ullint */ #define ULLINT_UNDEFINED ((ullint)(-1)) -/* This 'ibool' type is used within Innobase. Remember that different included +/** This 'ibool' type is used within Innobase. Remember that different included headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */ #define ibool ulint @@ -410,7 +489,7 @@ headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */ #endif -/* The following number as the length of a logical field means that the field +/** The following number as the length of a logical field means that the field has the SQL NULL as its value. NOTE that because we assume that the length of a field is a 32-bit integer when we store it, for example, to an undo log on disk, we must have also this number fit in 32 bits, also in 64-bit @@ -418,15 +497,23 @@ computers! */ #define UNIV_SQL_NULL ULINT32_UNDEFINED -/* Lengths which are not UNIV_SQL_NULL, but bigger than the following +/** Lengths which are not UNIV_SQL_NULL, but bigger than the following number indicate that a field contains a reference to an externally stored part of the field in the tablespace. The length field then contains the sum of the following flag and the locally stored len. */ -#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE) +#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE_MAX) -/* Some macros to improve branch prediction and reduce cache misses */ #if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER) +#define HAVE_GCC_GT_2 +/* Tell the compiler that variable/function is unused. */ +# define UNIV_UNUSED __attribute__ ((unused)) +#else +# define UNIV_UNUSED +#endif /* CHECK FOR GCC VER_GT_2 */ + +/* Some macros to improve branch prediction and reduce cache misses */ +#if defined(INNODB_COMPILER_HINTS) && defined(HAVE_GCC_GT_2) /* Tell the compiler that 'expr' probably evaluates to 'constant'. */ # define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant) /* Tell the compiler that a pointer is likely to be NULL */ @@ -437,19 +524,30 @@ it is read. */ /* Minimize cache-miss latency by moving data at addr into a cache before it is read or written. */ # define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3) + /* Sun Studio includes sun_prefetch.h as of version 5.9 */ #elif (defined(__SUNPRO_C) && __SUNPRO_C >= 0x590) \ || (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x590) + # include <sun_prefetch.h> + #if __SUNPRO_C >= 0x550 # undef UNIV_INTERN # define UNIV_INTERN __hidden #endif /* __SUNPRO_C >= 0x550 */ -/* Use sun_prefetch when compile with Sun Studio */ + # define UNIV_EXPECT(expr,value) (expr) # define UNIV_LIKELY_NULL(expr) (expr) -# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr) -# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) + +# if defined(INNODB_COMPILER_HINTS) +//# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr) +# define UNIV_PREFETCH_R(addr) ((void) 0) +# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) +# else +# define UNIV_PREFETCH_R(addr) ((void) 0) +# define UNIV_PREFETCH_RW(addr) ((void) 0) +# endif /* INNODB_COMPILER_HINTS */ + #else /* Dummy versions of the macros */ # define UNIV_EXPECT(expr,value) (expr) @@ -457,6 +555,7 @@ it is read or written. */ # define UNIV_PREFETCH_R(addr) ((void) 0) # define UNIV_PREFETCH_RW(addr) ((void) 0) #endif + /* Tell the compiler that cond is likely to hold */ #define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE) /* Tell the compiler that cond is unlikely to hold */ @@ -487,7 +586,7 @@ typedef void* os_thread_ret_t; # define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) # define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size) # define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) -# define UNIV_MEM_DESC(addr, size, b) VALGRIND_CREATE_BLOCK(addr, size, b) +# define UNIV_MEM_DESC(addr, size) VALGRIND_CREATE_BLOCK(addr, size, #addr) # define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b) # define UNIV_MEM_ASSERT_RW(addr, size) do { \ const void* _p = (const void*) (ulint) \ @@ -512,7 +611,7 @@ typedef void* os_thread_ret_t; # define UNIV_MEM_INVALID(addr, size) do {} while(0) # define UNIV_MEM_FREE(addr, size) do {} while(0) # define UNIV_MEM_ALLOC(addr, size) do {} while(0) -# define UNIV_MEM_DESC(addr, size, b) do {} while(0) +# define UNIV_MEM_DESC(addr, size) do {} while(0) # define UNIV_MEM_UNDESC(b) do {} while(0) # define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0) # define UNIV_MEM_ASSERT_W(addr, size) do {} while(0) @@ -526,4 +625,7 @@ typedef void* os_thread_ret_t; UNIV_MEM_ALLOC(addr, size); \ } while (0) +extern ulong srv_page_size_shift; +extern ulong srv_page_size; + #endif diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h index 2c288f7d455..4a0710c5060 100644 --- a/storage/innobase/include/usr0sess.h +++ b/storage/innobase/include/usr0sess.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -51,7 +51,8 @@ sess_close( /*=======*/ sess_t* sess); /* in, own: session object */ -/* The session handle. All fields are protected by the kernel mutex */ +/* The session handle. This data structure is only used by purge and is +not really necessary. We should get rid of it. */ struct sess_struct{ ulint state; /*!< state of the session */ trx_t* trx; /*!< transaction object permanently diff --git a/storage/innobase/include/usr0sess.ic b/storage/innobase/include/usr0sess.ic index 35a75d75acc..284e59537fe 100644 --- a/storage/innobase/include/usr0sess.ic +++ b/storage/innobase/include/usr0sess.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h index 6cc6f015613..403ad0223a8 100644 --- a/storage/innobase/include/usr0types.h +++ b/storage/innobase/include/usr0types.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/ut0bh.h b/storage/innobase/include/ut0bh.h index 1b211390283..4c029e256a9 100644 --- a/storage/innobase/include/ut0bh.h +++ b/storage/innobase/include/ut0bh.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/ut0bh.ic b/storage/innobase/include/ut0bh.ic index afbe58e7e3b..a604237665d 100644 --- a/storage/innobase/include/ut0bh.ic +++ b/storage/innobase/include/ut0bh.ic @@ -1,5 +1,6 @@ /***************************************************************************//** -Copyright (c) 2011, Oracle Corpn. All Rights Reserved. + +Copyright (c) 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -10,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -122,4 +123,3 @@ ib_bh_last( : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1)); } - diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h index b99d7175b94..5bdd553ca80 100644 --- a/storage/innobase/include/ut0byte.h +++ b/storage/innobase/include/ut0byte.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -32,7 +32,7 @@ Created 1/20/1994 Heikki Tuuri /*******************************************************//** Creates a 64-bit integer out of two 32-bit integers. -@return created dulint */ +@return created integer */ UNIV_INLINE ib_uint64_t ut_ull_create( diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic index e7908efa41a..873d98c727e 100644 --- a/storage/innobase/include/ut0byte.ic +++ b/storage/innobase/include/ut0byte.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -25,7 +25,7 @@ Created 5/30/1994 Heikki Tuuri /*******************************************************//** Creates a 64-bit integer out of two 32-bit integers. -@return created dulint */ +@return created integer */ UNIV_INLINE ib_uint64_t ut_ull_create( @@ -90,7 +90,7 @@ ut_align( ut_ad(sizeof(void*) == sizeof(ulint)); - return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1))); + return((void*)((((ulint) ptr) + align_no - 1) & ~(align_no - 1))); } /*********************************************************//** @@ -110,7 +110,7 @@ ut_align_down( ut_ad(sizeof(void*) == sizeof(ulint)); - return((void*)((((ulint)ptr)) & ~(align_no - 1))); + return((void*)((((ulint) ptr)) & ~(align_no - 1))); } /*********************************************************//** @@ -130,7 +130,7 @@ ut_align_offset( ut_ad(sizeof(void*) == sizeof(ulint)); - return(((ulint)ptr) & (align_no - 1)); + return(((ulint) ptr) & (align_no - 1)); } /*****************************************************************//** diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h new file mode 100644 index 00000000000..456648001aa --- /dev/null +++ b/storage/innobase/include/ut0crc32.h @@ -0,0 +1,48 @@ +/***************************************************************************** + +Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/ut0crc32.h +CRC32 implementation + +Created Aug 10, 2011 Vasil Dimov +*******************************************************/ + +#ifndef ut0crc32_h +#define ut0crc32_h + +#include "univ.i" + +/********************************************************************//** +Initializes the data structures used by ut_crc32(). Does not do any +allocations, would not hurt if called twice, but would be pointless. */ +UNIV_INTERN +void +ut_crc32_init(); +/*===========*/ + +/********************************************************************//** +Calculates CRC32. +@param ptr - data over which to calculate CRC32. +@param len - data length in bytes. +@return CRC32 (CRC-32C, using the GF(2) primitive polynomial 0x11EDC6F41, +or 0x1EDC6F41 without the high-order bit) */ +typedef ib_uint32_t (*ib_ut_crc32_t)(const byte* ptr, ulint len); + +extern ib_ut_crc32_t ut_crc32; +#endif /* ut0crc32_h */ diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h index ce6dcb63049..e9ad62fb81b 100644 --- a/storage/innobase/include/ut0dbg.h +++ b/storage/innobase/include/ut0dbg.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -26,6 +26,12 @@ Created 1/30/1994 Heikki Tuuri #ifndef ut0dbg_h #define ut0dbg_h +#ifdef UNIV_INNOCHECKSUM +#define ut_a assert +#define ut_ad assert +#define ut_error assert(0) +#else /* !UNIV_INNOCHECKSUM */ + #include "univ.i" #include <stdlib.h> #include "os0thread.h" @@ -163,4 +169,6 @@ speedo_show( #endif /* UNIV_COMPILE_TEST_FUNCS */ +#endif /* !UNIV_INNOCHECKSUM */ + #endif diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h index ec67f4e2a0f..57d6bdc33a6 100644 --- a/storage/innobase/include/ut0list.h +++ b/storage/innobase/include/ut0list.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -142,6 +142,15 @@ ib_list_get_last( /*=============*/ ib_list_t* list); /*!< in: list */ +/******************************************************************** +Check if list is empty. */ +UNIV_INLINE +ibool +ib_list_is_empty( +/*=============*/ + /* out: TRUE if empty else */ + const ib_list_t* list); /* in: list */ + /* List. */ struct ib_list_struct { ib_list_node_t* first; /*!< first node */ diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic index eb5c62796e8..d9dcb2eac99 100644 --- a/storage/innobase/include/ut0list.ic +++ b/storage/innobase/include/ut0list.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -46,3 +46,15 @@ ib_list_get_last( { return(list->last); } + +/******************************************************************** +Check if list is empty. */ +UNIV_INLINE +ibool +ib_list_is_empty( +/*=============*/ + /* out: TRUE if empty else FALSE */ + const ib_list_t* list) /* in: list */ +{ + return(!(list->first || list->last)); +} diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h index bb295ea1b22..51c89f15a77 100644 --- a/storage/innobase/include/ut0lst.h +++ b/storage/innobase/include/ut0lst.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -28,10 +28,17 @@ Created 9/10/1995 Heikki Tuuri #include "univ.i" +/*******************************************************************//** +Return offset of F in POD T. +@param T - POD pointer +@param F - Field in T */ +#define IB_OFFSETOF(T, F) \ + (reinterpret_cast<byte*>(&(T)->F) - reinterpret_cast<byte*>(T)) + /* This module implements the two-way linear list which should be used if a list is used in the database. Note that a single struct may belong to two or more lists, provided that the list are given different names. -An example of the usage of the lists can be found in fil0fil.c. */ +An example of the usage of the lists can be found in fil0fil.cc. */ /*******************************************************************//** This macro expands to the unnamed type definition of a struct which acts @@ -39,12 +46,16 @@ as the two-way list base node. The base node contains pointers to both ends of the list and a count of nodes in the list (excluding the base node from the count). @param TYPE the name of the list node data type */ -#define UT_LIST_BASE_NODE_T(TYPE)\ -struct {\ - ulint count; /*!< count of nodes in list */\ - TYPE * start; /*!< pointer to list start, NULL if empty */\ - TYPE * end; /*!< pointer to list end, NULL if empty */\ -}\ +template <typename TYPE> +struct ut_list_base { + typedef TYPE elem_type; + + ulint count; /*!< count of nodes in list */ + TYPE* start; /*!< pointer to list start, NULL if empty */ + TYPE* end; /*!< pointer to list end, NULL if empty */ +}; + +#define UT_LIST_BASE_NODE_T(TYPE) ut_list_base<TYPE> /*******************************************************************//** This macro expands to the unnamed type definition of a struct which @@ -62,12 +73,29 @@ struct LRU_node_struct { The example implements an LRU list of name LRU_list. Its nodes are of type LRU_node_t. */ -#define UT_LIST_NODE_T(TYPE)\ -struct {\ - TYPE * prev; /*!< pointer to the previous node,\ - NULL if start of list */\ - TYPE * next; /*!< pointer to next node, NULL if end of list */\ -}\ +template <typename TYPE> +struct ut_list_node { + TYPE* prev; /*!< pointer to the previous node, + NULL if start of list */ + TYPE* next; /*!< pointer to next node, NULL if end of list */ +}; + +#define UT_LIST_NODE_T(TYPE) ut_list_node<TYPE> + +/*******************************************************************//** +Get the list node at offset. +@param elem - list element +@param offset - offset within element. +@return reference to list node. */ +template <typename Type> +ut_list_node<Type>& +ut_elem_get_node(Type& elem, size_t offset) +{ + ut_a(offset < sizeof(elem)); + + return(*reinterpret_cast<ut_list_node<Type>*>( + reinterpret_cast<byte*>(&elem) + offset)); +} /*******************************************************************//** Initializes the base node of a two-way list. @@ -82,108 +110,197 @@ Initializes the base node of a two-way list. /*******************************************************************//** Adds the node as the first element in a two-way linked list. +@param list the base node (not a pointer to it) +@param elem the element to add +@param offset offset of list node in elem. */ +template <typename List, typename Type> +void +ut_list_prepend( + List& list, + Type& elem, + size_t offset) +{ + ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset); + + elem_node.prev = 0; + elem_node.next = list.start; + + if (list.start != 0) { + ut_list_node<Type>& base_node = + ut_elem_get_node(*list.start, offset); + + ut_ad(list.start != &elem); + + base_node.prev = &elem; + } + + list.start = &elem; + + if (list.end == 0) { + list.end = &elem; + } + + ++list.count; +} + +/*******************************************************************//** +Adds the node as the first element in a two-way linked list. @param NAME list name -@param BASE the base node (not a pointer to it) -@param N pointer to the node to be added to the list. -*/ -#define UT_LIST_ADD_FIRST(NAME, BASE, N)\ -{\ - ut_ad(N);\ - ((BASE).count)++;\ - ((N)->NAME).next = (BASE).start;\ - ((N)->NAME).prev = NULL;\ - if (UNIV_LIKELY((BASE).start != NULL)) {\ - ut_ad((BASE).start != (N));\ - (((BASE).start)->NAME).prev = (N);\ - }\ - (BASE).start = (N);\ - if (UNIV_UNLIKELY((BASE).end == NULL)) {\ - (BASE).end = (N);\ - }\ -}\ +@param LIST the base node (not a pointer to it) +@param ELEM the element to add */ +#define UT_LIST_ADD_FIRST(NAME, LIST, ELEM) \ + ut_list_prepend(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME)) + +/*******************************************************************//** +Adds the node as the last element in a two-way linked list. +@param list list +@param elem the element to add +@param offset offset of list node in elem */ +template <typename List, typename Type> +void +ut_list_append( + List& list, + Type& elem, + size_t offset) +{ + ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset); + + elem_node.next = 0; + elem_node.prev = list.end; + + if (list.end != 0) { + ut_list_node<Type>& base_node = + ut_elem_get_node(*list.end, offset); + + ut_ad(list.end != &elem); + + base_node.next = &elem; + } + + list.end = &elem; + + if (list.start == 0) { + list.start = &elem; + } + + ++list.count; +} /*******************************************************************//** Adds the node as the last element in a two-way linked list. @param NAME list name -@param BASE the base node (not a pointer to it) -@param N pointer to the node to be added to the list -*/ -#define UT_LIST_ADD_LAST(NAME, BASE, N)\ -{\ - ut_ad(N != NULL);\ - ((BASE).count)++;\ - ((N)->NAME).prev = (BASE).end;\ - ((N)->NAME).next = NULL;\ - if ((BASE).end != NULL) {\ - ut_ad((BASE).end != (N));\ - (((BASE).end)->NAME).next = (N);\ - }\ - (BASE).end = (N);\ - if ((BASE).start == NULL) {\ - (BASE).start = (N);\ - }\ -}\ +@param LIST list +@param ELEM the element to add */ +#define UT_LIST_ADD_LAST(NAME, LIST, ELEM)\ + ut_list_append(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME)) /*******************************************************************//** -Inserts a NODE2 after NODE1 in a list. +Inserts a ELEM2 after ELEM1 in a list. +@param list the base node +@param elem1 node after which ELEM2 is inserted +@param elem2 node being inserted after NODE1 +@param offset offset of list node in elem1 and elem2 */ +template <typename List, typename Type> +void +ut_list_insert( + List& list, + Type& elem1, + Type& elem2, + size_t offset) +{ + ut_ad(&elem1 != &elem2); + + ut_list_node<Type>& elem1_node = ut_elem_get_node(elem1, offset); + ut_list_node<Type>& elem2_node = ut_elem_get_node(elem2, offset); + + elem2_node.prev = &elem1; + elem2_node.next = elem1_node.next; + + if (elem1_node.next != NULL) { + ut_list_node<Type>& next_node = + ut_elem_get_node(*elem1_node.next, offset); + + next_node.prev = &elem2; + } + + elem1_node.next = &elem2; + + if (list.end == &elem1) { + list.end = &elem2; + } + + ++list.count; +} + +/*******************************************************************//** +Inserts a ELEM2 after ELEM1 in a list. @param NAME list name -@param BASE the base node (not a pointer to it) -@param NODE1 pointer to node after which NODE2 is inserted -@param NODE2 pointer to node being inserted after NODE1 -*/ -#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\ -{\ - ut_ad(NODE1);\ - ut_ad(NODE2);\ - ut_ad((NODE1) != (NODE2));\ - ((BASE).count)++;\ - ((NODE2)->NAME).prev = (NODE1);\ - ((NODE2)->NAME).next = ((NODE1)->NAME).next;\ - if (((NODE1)->NAME).next != NULL) {\ - ((((NODE1)->NAME).next)->NAME).prev = (NODE2);\ - }\ - ((NODE1)->NAME).next = (NODE2);\ - if ((BASE).end == (NODE1)) {\ - (BASE).end = (NODE2);\ - }\ -}\ +@param LIST the base node +@param ELEM1 node after which ELEM2 is inserted +@param ELEM2 node being inserted after ELEM1 */ +#define UT_LIST_INSERT_AFTER(NAME, LIST, ELEM1, ELEM2)\ + ut_list_insert(LIST, *ELEM1, *ELEM2, IB_OFFSETOF(ELEM1, NAME)) #ifdef UNIV_LIST_DEBUG /** Invalidate the pointers in a list node. @param NAME list name @param N pointer to the node that was removed */ -# define UT_LIST_REMOVE_CLEAR(NAME, N) \ -((N)->NAME.prev = (N)->NAME.next = (void*) -1) +# define UT_LIST_REMOVE_CLEAR(N) \ + (N).next = (Type*) -1; \ + (N).prev = (N).next #else /** Invalidate the pointers in a list node. @param NAME list name @param N pointer to the node that was removed */ -# define UT_LIST_REMOVE_CLEAR(NAME, N) while (0) -#endif +# define UT_LIST_REMOVE_CLEAR(N) +#endif /* UNIV_LIST_DEBUG */ /*******************************************************************//** Removes a node from a two-way linked list. -@param NAME list name -@param BASE the base node (not a pointer to it) -@param N pointer to the node to be removed from the list -*/ -#define UT_LIST_REMOVE(NAME, BASE, N) \ -do { \ - ut_ad(N); \ - ut_a((BASE).count > 0); \ - ((BASE).count)--; \ - if (((N)->NAME).next != NULL) { \ - ((((N)->NAME).next)->NAME).prev = ((N)->NAME).prev; \ - } else { \ - (BASE).end = ((N)->NAME).prev; \ - } \ - if (((N)->NAME).prev != NULL) { \ - ((((N)->NAME).prev)->NAME).next = ((N)->NAME).next; \ - } else { \ - (BASE).start = ((N)->NAME).next; \ - } \ - UT_LIST_REMOVE_CLEAR(NAME, N); \ -} while (0) +@param list the base node (not a pointer to it) +@param elem node to be removed from the list +@param offset offset of list node within elem */ +template <typename List, typename Type> +void +ut_list_remove( + List& list, + Type& elem, + size_t offset) +{ + ut_list_node<Type>& elem_node = ut_elem_get_node(elem, offset); + + ut_a(list.count > 0); + + if (elem_node.next != NULL) { + ut_list_node<Type>& next_node = + ut_elem_get_node(*elem_node.next, offset); + + next_node.prev = elem_node.prev; + } else { + list.end = elem_node.prev; + } + + if (elem_node.prev != NULL) { + ut_list_node<Type>& prev_node = + ut_elem_get_node(*elem_node.prev, offset); + + prev_node.next = elem_node.next; + } else { + list.start = elem_node.next; + } + + UT_LIST_REMOVE_CLEAR(elem_node); + + --list.count; +} + +/*******************************************************************//** +Removes a node from a two-way linked list. + aram NAME list name +@param LIST the base node (not a pointer to it) +@param ELEM node to be removed from the list */ +#define UT_LIST_REMOVE(NAME, LIST, ELEM) \ + ut_list_remove(LIST, *ELEM, IB_OFFSETOF(ELEM, NAME)) /********************************************************************//** Gets the next node in a two-way list. @@ -223,39 +340,70 @@ Gets the last node in a two-way list. #define UT_LIST_GET_LAST(BASE)\ (BASE).end +struct NullValidate { void operator()(const void* elem) { } }; + +/********************************************************************//** +Iterate over all the elements and call the functor for each element. +@param list base node (not a pointer to it) +@param functor Functor that is called for each element in the list +@parm node pointer to member node within list element */ +template <typename List, class Functor> +void +ut_list_map( + List& list, + ut_list_node<typename List::elem_type> + List::elem_type::*node, + Functor functor) +{ + ulint count = 0; + + for (typename List::elem_type* elem = list.start; + elem != 0; + elem = (elem->*node).next, ++count) { + + functor(elem); + } + + ut_a(count == list.count); +} + +/********************************************************************//** +Checks the consistency of a two-way list. +@param list base node (not a pointer to it) +@param functor Functor that is called for each element in the list +@parm node pointer to member node within list element */ +template <typename List, class Functor> +void +ut_list_validate( + List& list, + ut_list_node<typename List::elem_type> + List::elem_type::*node, + Functor functor = NullValidate()) +{ + ut_list_map(list, node, functor); + + ulint count = 0; + + for (typename List::elem_type* elem = list.end; + elem != 0; + elem = (elem->*node).prev, ++count) { + + functor(elem); + } + + ut_a(count == list.count); +} + /********************************************************************//** Checks the consistency of a two-way list. @param NAME the name of the list @param TYPE node type -@param BASE base node (not a pointer to it) -@param ASSERTION a condition on ut_list_node_313 */ -#define UT_LIST_VALIDATE(NAME, TYPE, BASE, ASSERTION) \ -do { \ - ulint ut_list_i_313; \ - TYPE* ut_list_node_313; \ - \ - ut_list_node_313 = (BASE).start; \ - \ - for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ - ut_a(ut_list_node_313); \ - ASSERTION; \ - ut_ad((ut_list_node_313->NAME).next || !ut_list_i_313); \ - ut_list_node_313 = (ut_list_node_313->NAME).next; \ - } \ - \ - ut_a(ut_list_node_313 == NULL); \ - \ - ut_list_node_313 = (BASE).end; \ - \ - for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \ - ut_a(ut_list_node_313); \ - ASSERTION; \ - ut_ad((ut_list_node_313->NAME).prev || !ut_list_i_313); \ - ut_list_node_313 = (ut_list_node_313->NAME).prev; \ - } \ - \ - ut_a(ut_list_node_313 == NULL); \ -} while (0) - -#endif +@param LIST base node (not a pointer to it) +@param FUNCTOR called for each list element */ +#define UT_LIST_VALIDATE(NAME, TYPE, LIST, FUNCTOR) \ + ut_list_validate(LIST, &TYPE::NAME, FUNCTOR) + +#define UT_LIST_CHECK(NAME, TYPE, LIST) \ + ut_list_validate(LIST, &TYPE::NAME, NullValidate()) +#endif /* ut0lst.h */ diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h index 39f5f20dc6d..af7eb4e9b1d 100644 --- a/storage/innobase/include/ut0mem.h +++ b/storage/innobase/include/ut0mem.h @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -101,7 +101,7 @@ ut_free( void* ptr); /*!< in, own: memory block, can be NULL */ #ifndef UNIV_HOTBACKUP /**********************************************************************//** -Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not +Implements realloc. This is needed by /pars/lexyy.cc. Otherwise, you should not use this function because the allocation functions in mem0mem.h are the recommended ones in InnoDB. @@ -211,6 +211,18 @@ ut_strreplace( const char* s1, /*!< in: string to replace */ const char* s2); /*!< in: string to replace s1 with */ +/******************************************************************** +Concatenate 3 strings.*/ + +char* +ut_str3cat( +/*=======*/ + /* out, own: concatenated string, must be + freed with mem_free() */ + const char* s1, /* in: string 1 */ + const char* s2, /* in: string 2 */ + const char* s3); /* in: string 3 */ + /**********************************************************************//** Converts a raw binary data to a NUL-terminated hex string. The output is truncated if there is not enough space in "hex", make sure "hex_size" is at diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic index c06e2b3ae81..5c9071d52cc 100644 --- a/storage/innobase/include/ut0mem.ic +++ b/storage/innobase/include/ut0mem.ic @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -280,7 +280,7 @@ ut_str_sql_format( switch (ch) { case '\0': - if (UNIV_UNLIKELY(buf_size - buf_i < 4)) { + if (buf_size - buf_i < 4) { goto func_exit; } @@ -292,7 +292,7 @@ ut_str_sql_format( case '\'': case '\\': - if (UNIV_UNLIKELY(buf_size - buf_i < 4)) { + if (buf_size - buf_i < 4) { goto func_exit; } diff --git a/storage/innobase/include/ut0rbt.h b/storage/innobase/include/ut0rbt.h index e26b637ae13..e8a4430e76b 100644 --- a/storage/innobase/include/ut0rbt.h +++ b/storage/innobase/include/ut0rbt.h @@ -1,12 +1,6 @@ /***************************************************************************//** -Copyright (c) 2007, 2010, Innobase Oy. All Rights Reserved. - -Portions of this file contain modifications contributed and copyrighted by -Sun Microsystems, Inc. Those modifications are gratefully acknowledged and -are described briefly in the InnoDB documentation. The contributions by -Sun Microsystems are incorporated with their permission, and subject to the -conditions contained in the file COPYING.Sun_Microsystems. +Copyright (c) 2007, 2010, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -17,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ /******************************************************************//** @@ -57,6 +51,7 @@ typedef struct ib_rbt_node_struct ib_rbt_node_t; typedef struct ib_rbt_bound_struct ib_rbt_bound_t; typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node); typedef int (*ib_rbt_compare)(const void* p1, const void* p2); +typedef int (*ib_rbt_arg_compare)(const void*, const void* p1, const void* p2); /** Red black tree color types */ enum ib_rbt_color_enum { @@ -90,7 +85,11 @@ struct ib_rbt_struct { ulint n_nodes; /* Total number of data nodes */ ib_rbt_compare compare; /* Fn. to use for comparison */ + ib_rbt_arg_compare + compare_with_arg; /* Fn. to use for comparison + with argument */ ulint sizeof_value; /* Sizeof the item in bytes */ + const void* cmp_arg; /* Compare func argument */ }; /** The result of searching for a key in the tree, this is useful for @@ -133,6 +132,18 @@ rbt_create( size_t sizeof_value, /*!< in: size in bytes */ ib_rbt_compare compare); /*!< in: comparator */ /**********************************************************************//** +Create an instance of a red black tree, whose comparison function takes +an argument +@return rb tree instance */ +UNIV_INTERN +ib_rbt_t* +rbt_create_arg_cmp( +/*===============*/ + size_t sizeof_value, /*!< in: size in bytes */ + ib_rbt_arg_compare + compare, /*!< in: comparator */ + const void* cmp_arg); /*!< in: compare fn arg */ +/**********************************************************************//** Delete a node from the red black tree, identified by key */ UNIV_INTERN ibool @@ -265,7 +276,10 @@ rbt_search_cmp( const ib_rbt_t* tree, /*!< in: rb tree */ ib_rbt_bound_t* parent, /*!< in: search bounds */ const void* key, /*!< in: key to search */ - ib_rbt_compare compare); /*!< in: comparator */ + ib_rbt_compare compare, /*!< in: comparator */ + ib_rbt_arg_compare + arg_compare); /*!< in: fn to compare items + with argument */ /**********************************************************************//** Clear the tree, deletes (and free's) all the nodes. */ UNIV_INTERN diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h index 946b1117af7..53b769849a5 100644 --- a/storage/innobase/include/ut0rnd.h +++ b/storage/innobase/include/ut0rnd.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -28,6 +28,8 @@ Created 1/20/1994 Heikki Tuuri #include "univ.i" +#ifndef UNIV_INNOCHECKSUM + #include "ut0byte.h" /** The 'character code' for end of field or string (used @@ -87,16 +89,6 @@ ut_hash_ulint( ulint key, /*!< in: value to be hashed */ ulint table_size); /*!< in: hash table size */ /*************************************************************//** -Folds a pair of ulints. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_ulint_pair( -/*===============*/ - ulint n1, /*!< in: ulint */ - ulint n2) /*!< in: ulint */ - __attribute__((const)); -/*************************************************************//** Folds a 64-bit integer. @return folded value */ UNIV_INLINE @@ -114,16 +106,6 @@ ut_fold_string( /*===========*/ const char* str) /*!< in: null-terminated string */ __attribute__((pure)); -/*************************************************************//** -Folds a binary string. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_binary( -/*===========*/ - const byte* str, /*!< in: string of bytes */ - ulint len) /*!< in: length */ - __attribute__((pure)); /***********************************************************//** Looks for a prime number slightly greater than the given argument. The prime is chosen so that it is not near any power of 2. @@ -135,6 +117,29 @@ ut_find_prime( ulint n) /*!< in: positive number > 100 */ __attribute__((const)); +#endif /* !UNIV_INNOCHECKSUM */ + +/*************************************************************//** +Folds a pair of ulints. +@return folded value */ +UNIV_INLINE +ulint +ut_fold_ulint_pair( +/*===============*/ + ulint n1, /*!< in: ulint */ + ulint n2) /*!< in: ulint */ + __attribute__((const)); +/*************************************************************//** +Folds a binary string. +@return folded value */ +UNIV_INLINE +ulint +ut_fold_binary( +/*===========*/ + const byte* str, /*!< in: string of bytes */ + ulint len) /*!< in: length */ + __attribute__((pure)); + #ifndef UNIV_NONINL #include "ut0rnd.ic" diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic index 795b8ab7a85..024c59e553b 100644 --- a/storage/innobase/include/ut0rnd.ic +++ b/storage/innobase/include/ut0rnd.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -25,6 +25,9 @@ Created 5/30/1994 Heikki Tuuri #define UT_HASH_RANDOM_MASK 1463735687 #define UT_HASH_RANDOM_MASK2 1653893711 + +#ifndef UNIV_INNOCHECKSUM + #define UT_RND1 151117737 #define UT_RND2 119785373 #define UT_RND3 85689495 @@ -156,20 +159,6 @@ ut_hash_ulint( } /*************************************************************//** -Folds a pair of ulints. -@return folded value */ -UNIV_INLINE -ulint -ut_fold_ulint_pair( -/*===============*/ - ulint n1, /*!< in: ulint */ - ulint n2) /*!< in: ulint */ -{ - return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1) - ^ UT_HASH_RANDOM_MASK) + n2); -} - -/*************************************************************//** Folds a 64-bit integer. @return folded value */ UNIV_INLINE @@ -203,6 +192,22 @@ ut_fold_string( return(fold); } +#endif /* !UNIV_INNOCHECKSUM */ + +/*************************************************************//** +Folds a pair of ulints. +@return folded value */ +UNIV_INLINE +ulint +ut_fold_ulint_pair( +/*===============*/ + ulint n1, /*!< in: ulint */ + ulint n2) /*!< in: ulint */ +{ + return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1) + ^ UT_HASH_RANDOM_MASK) + n2); +} + /*************************************************************//** Folds a binary string. @return folded value */ @@ -213,15 +218,37 @@ ut_fold_binary( const byte* str, /*!< in: string of bytes */ ulint len) /*!< in: length */ { - const byte* str_end = str + len; ulint fold = 0; + const byte* str_end = str + (len & 0xFFFFFFF8); ut_ad(str || !len); while (str < str_end) { - fold = ut_fold_ulint_pair(fold, (ulint)(*str)); + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + } - str++; + switch (len & 0x7) { + case 7: + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + case 6: + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + case 5: + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + case 4: + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + case 3: + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + case 2: + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + case 1: + fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); } return(fold); diff --git a/storage/innobase/include/ut0sort.h b/storage/innobase/include/ut0sort.h index 5c6647dda9e..75648b5c317 100644 --- a/storage/innobase/include/ut0sort.h +++ b/storage/innobase/include/ut0sort.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index 47ab6eb9b74..35b8a580e68 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -1,13 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. -Copyright (c) 2009, Sun Microsystems, Inc. - -Portions of this file contain modifications contributed and copyrighted by -Sun Microsystems, Inc. Those modifications are gratefully acknowledged and -are described briefly in the InnoDB documentation. The contributions by -Sun Microsystems are incorporated with their permission, and subject to the -conditions contained in the file COPYING.Sun_Microsystems. +Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -18,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -35,6 +28,8 @@ Created 1/20/1994 Heikki Tuuri #include "univ.i" +#ifndef UNIV_INNOCHECKSUM + #include "db0err.h" #ifndef UNIV_HOTBACKUP @@ -46,6 +41,8 @@ Created 1/20/1994 Heikki Tuuri #include <ctype.h> #endif +#include <stdarg.h> /* for va_list */ + /** Index name prefix in fast index creation */ #define TEMP_INDEX_PREFIX '\377' /** Index name prefix in fast index creation, as a string constant */ @@ -55,27 +52,32 @@ Created 1/20/1994 Heikki Tuuri typedef time_t ib_time_t; #ifndef UNIV_HOTBACKUP -#if defined(HAVE_PAUSE_INSTRUCTION) +# if defined(HAVE_PAUSE_INSTRUCTION) /* According to the gcc info page, asm volatile means that the instruction has important side-effects and must not be removed. Also asm volatile may trigger a memory barrier (spilling all registers to memory). */ -# define UT_RELAX_CPU() __asm__ __volatile__ ("pause") -#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION) +# ifdef __SUNPRO_CC +# define UT_RELAX_CPU() asm ("pause" ) +# else +# define UT_RELAX_CPU() __asm__ __volatile__ ("pause") +# endif /* __SUNPRO_CC */ + +# elif defined(HAVE_FAKE_PAUSE_INSTRUCTION) # define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop") -#elif defined(HAVE_WINDOWS_ATOMICS) - /* In the Win32 API, the x86 PAUSE instruction is executed by calling - the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- - independent way by using YieldProcessor. */ -# define UT_RELAX_CPU() YieldProcessor() -#elif defined(HAVE_ATOMIC_BUILTINS) +# elif defined(HAVE_ATOMIC_BUILTINS) # define UT_RELAX_CPU() do { \ volatile lint volatile_var; \ os_compare_and_swap_lint(&volatile_var, 0, 1); \ } while (0) -#else +# elif defined(HAVE_WINDOWS_ATOMICS) + /* In the Win32 API, the x86 PAUSE instruction is executed by calling + the YieldProcessor macro defined in WinNT.h. It is a CPU architecture- + independent way by using YieldProcessor. */ +# define UT_RELAX_CPU() YieldProcessor() +# else # define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */ -#endif +# endif /*********************************************************************//** Delays execution for at most max_wait_us microseconds or returns earlier @@ -94,16 +96,9 @@ do { \ } while (0) #endif /* !UNIV_HOTBACKUP */ -/********************************************************//** -Gets the high 32 bits in a ulint. That is makes a shift >> 32, -but since there seem to be compiler bugs in both gcc and Visual C++, -we do this by a special conversion. -@return a >> 32 */ -UNIV_INTERN -ulint -ut_get_high32( -/*==========*/ - ulint a); /*!< in: ulint */ +template <class T> T ut_min(T a, T b) { return(a < b ? a : b); } +template <class T> T ut_max(T a, T b) { return(a > b ? a : b); } + /******************************************************//** Calculates the minimum of two ulints. @return minimum */ @@ -261,6 +256,16 @@ ut_time_ms(void); #endif /* !UNIV_HOTBACKUP */ /**********************************************************//** +Returns the number of milliseconds since some epoch. The +value may wrap around. It should only be used for heuristic +purposes. +@return ms since epoch */ +UNIV_INTERN +ulint +ut_time_ms(void); +/*============*/ + +/**********************************************************//** Returns the difference of two times in seconds. @return time2 - time1 expressed in seconds */ UNIV_INTERN @@ -269,6 +274,9 @@ ut_difftime( /*========*/ ib_time_t time2, /*!< in: time */ ib_time_t time1); /*!< in: time */ + +#endif /* !UNIV_INNOCHECKSUM */ + /**********************************************************//** Prints a timestamp to a file. */ UNIV_INTERN @@ -277,6 +285,9 @@ ut_print_timestamp( /*===============*/ FILE* file) /*!< in: file where to print */ UNIV_COLD __attribute__((nonnull)); + +#ifndef UNIV_INNOCHECKSUM + /**********************************************************//** Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */ UNIV_INTERN @@ -379,6 +390,22 @@ ut_copy_file( #ifdef __WIN__ /**********************************************************************//** +A substitute for vsnprintf(3), formatted output conversion into +a limited buffer. Note: this function DOES NOT return the number of +characters that would have been printed if the buffer was unlimited because +VC's _vsnprintf() returns -1 in this case and we would need to call +_vscprintf() in addition to estimate that but we would need another copy +of "ap" for that and VC does not provide va_copy(). */ +UNIV_INTERN +void +ut_vsnprintf( +/*=========*/ + char* str, /*!< out: string */ + size_t size, /*!< in: str size */ + const char* fmt, /*!< in: format */ + va_list ap); /*!< in: format values */ + +/**********************************************************************//** A substitute for snprintf(3), formatted output conversion into a limited buffer. @return number of characters that would have been printed if the size @@ -393,6 +420,15 @@ ut_snprintf( ...); /*!< in: format values */ #else /**********************************************************************//** +A wrapper for vsnprintf(3), formatted output conversion into +a limited buffer. Note: this function DOES NOT return the number of +characters that would have been printed if the buffer was unlimited because +VC's _vsnprintf() returns -1 in this case and we would need to call +_vscprintf() in addition to estimate that but we would need another copy +of "ap" for that and VC does not provide va_copy(). */ +# define ut_vsnprintf(buf, size, fmt, ap) \ + ((void) vsnprintf(buf, size, fmt, ap)) +/**********************************************************************//** A wrapper for snprintf(3), formatted output conversion into a limited buffer. */ # define ut_snprintf snprintf @@ -408,9 +444,23 @@ ut_strerr( /*======*/ enum db_err num); /*!< in: error number */ +/**************************************************************** +Sort function for ulint arrays. */ +UNIV_INTERN +void +ut_ulint_sort( +/*==========*/ + ulint* arr, /*!< in/out: array to sort */ + ulint* aux_arr, /*!< in/out: aux array to use in sort */ + ulint low, /*!< in: lower bound */ + ulint high) /*!< in: upper bound */ + __attribute__((nonnull)); + #ifndef UNIV_NONINL #include "ut0ut.ic" #endif +#endif /* !UNIV_INNOCHECKSUM */ + #endif diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic index 6f55c7e410e..4e0f76e1957 100644 --- a/storage/innobase/include/ut0ut.ic +++ b/storage/innobase/include/ut0ut.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h index 0f8b955b098..f2a5aba8116 100644 --- a/storage/innobase/include/ut0vec.h +++ b/storage/innobase/include/ut0vec.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -29,59 +29,116 @@ Created 4/6/2006 Osku Salerma #include "univ.i" #include "mem0mem.h" -/** An automatically resizing vector data type. */ +typedef struct ib_alloc_struct ib_alloc_t; typedef struct ib_vector_struct ib_vector_t; -/* An automatically resizing vector datatype with the following properties: +typedef void* (*ib_mem_alloc_t)( + /* out: Pointer to allocated memory */ + ib_alloc_t* allocator, /* in: Pointer to allocator instance */ + ulint size); /* in: Number of bytes to allocate */ + +typedef void (*ib_mem_free_t)( + ib_alloc_t* allocator, /* in: Pointer to allocator instance */ + void* ptr); /* in: Memory to free */ - -Contains void* items. +typedef void* (*ib_mem_resize_t)( + /* out: Pointer to resized memory */ + ib_alloc_t* allocator, /* in: Pointer to allocator */ + void* ptr, /* in: Memory to resize */ + ulint old_size, /* in: Old memory size in bytes */ + ulint new_size); /* in: New size in bytes */ - -The items are owned by the caller. +typedef int (*ib_compare_t)(const void*, const void*); - -All memory allocation is done through a heap owned by the caller, who is - responsible for freeing it when done with the vector. +/* An automatically resizing vector datatype with the following properties: - -When the vector is resized, the old memory area is left allocated since it - uses the same heap as the new memory area, so this is best used for - relatively small or short-lived uses. + -All memory allocation is done through an allocator, which is responsible for +freeing it when done with the vector. */ -/****************************************************************//** -Create a new vector with the given initial size. -@return vector */ -UNIV_INTERN +/* This is useful shorthand for elements of type void* */ +#define ib_vector_getp(v, n) (*(void**) ib_vector_get(v, n)) +#define ib_vector_getp_const(v, n) (*(void**) ib_vector_get_const(v, n)) + +#define ib_vector_allocator(v) (v->allocator) + +/******************************************************************** +Create a new vector with the given initial size. */ + ib_vector_t* ib_vector_create( /*=============*/ - mem_heap_t* heap, /*!< in: heap */ - ulint size); /*!< in: initial size */ + /* out: vector */ + ib_alloc_t* alloc, /* in: Allocator */ + /* in: size of the data item */ + ulint sizeof_value, + ulint size); /* in: initial size */ -/****************************************************************//** -Push a new element to the vector, increasing its size if necessary. */ -UNIV_INTERN +/******************************************************************** +Destroy the vector. Make sure the vector owns the allocator, e.g., +the heap in the the heap allocator. */ +UNIV_INLINE void +ib_vector_free( +/*===========*/ + ib_vector_t* vec); /* in/out: vector */ + +/******************************************************************** +Push a new element to the vector, increasing its size if necessary, +if elem is not NULL then elem is copied to the vector.*/ +UNIV_INLINE +void* ib_vector_push( /*===========*/ + /* out: pointer the "new" element */ + ib_vector_t* vec, /* in/out: vector */ + const void* elem); /* in: data element */ + +/******************************************************************** +Pop the last element from the vector.*/ +UNIV_INLINE +void* +ib_vector_pop( +/*==========*/ + /* out: pointer to the "new" element */ + ib_vector_t* vec); /* in/out: vector */ + +/*******************************************************************//** +Remove an element to the vector +@return pointer to the "removed" element */ +UNIV_INLINE +void* +ib_vector_remove( +/*=============*/ ib_vector_t* vec, /*!< in: vector */ - void* elem); /*!< in: data element */ + const void* elem); /*!< in: value to remove */ -/****************************************************************//** -Get the number of elements in the vector. -@return number of elements in vector */ +/******************************************************************** +Get the number of elements in the vector. */ UNIV_INLINE ulint ib_vector_size( /*===========*/ - const ib_vector_t* vec); /*!< in: vector */ + /* out: number of elements in vector */ + const ib_vector_t* vec); /* in: vector */ -/****************************************************************//** +/******************************************************************** +Increase the size of the vector. */ + +void +ib_vector_resize( +/*=============*/ + /* out: number of elements in vector */ + ib_vector_t* vec); /* in/out: vector */ + +/******************************************************************** Test whether a vector is empty or not. -@return TRUE if empty */ +@return TRUE if empty */ UNIV_INLINE ibool ib_vector_is_empty( /*===============*/ - const ib_vector_t* vec); /*!< in: vector */ + const ib_vector_t* vec); /*!< in: vector */ /****************************************************************//** Get the n'th element. @@ -93,6 +150,15 @@ ib_vector_get( ib_vector_t* vec, /*!< in: vector */ ulint n); /*!< in: element index to get */ +/******************************************************************** +Const version of the get n'th element. +@return n'th element */ +UNIV_INLINE +const void* +ib_vector_get_const( +/*================*/ + const ib_vector_t* vec, /* in: vector */ + ulint n); /* in: element index to get */ /****************************************************************//** Get last element. The vector must not be empty. @return last element */ @@ -101,7 +167,6 @@ void* ib_vector_get_last( /*===============*/ ib_vector_t* vec); /*!< in: vector */ - /****************************************************************//** Set the n'th element. */ UNIV_INLINE @@ -112,33 +177,161 @@ ib_vector_set( ulint n, /*!< in: element index to set */ void* elem); /*!< in: data element */ -/****************************************************************//** -Remove the last element from the vector. */ +/******************************************************************** +Reset the vector size to 0 elements. */ +UNIV_INLINE +void +ib_vector_reset( +/*============*/ + ib_vector_t* vec); /* in/out: vector */ + +/******************************************************************** +Get the last element of the vector. */ UNIV_INLINE void* -ib_vector_pop( -/*==========*/ - ib_vector_t* vec); /*!< in: vector */ +ib_vector_last( +/*===========*/ + /* out: pointer to last element */ + ib_vector_t* vec); /* in/out: vector */ -/****************************************************************//** -Free the underlying heap of the vector. Note that vec is invalid -after this call. */ +/******************************************************************** +Get the last element of the vector. */ +UNIV_INLINE +const void* +ib_vector_last_const( +/*=================*/ + /* out: pointer to last element */ + const ib_vector_t* vec); /* in: vector */ + +/******************************************************************** +Sort the vector elements. */ UNIV_INLINE void -ib_vector_free( +ib_vector_sort( +/*===========*/ + ib_vector_t* vec, /* in/out: vector */ + ib_compare_t compare); /* in: the comparator to use for sort */ + +/******************************************************************** +The default ib_vector_t heap free. Does nothing. */ +UNIV_INLINE +void +ib_heap_free( +/*=========*/ + ib_alloc_t* allocator, /* in: allocator */ + void* ptr); /* in: size in bytes */ + +/******************************************************************** +The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */ +UNIV_INLINE +void* +ib_heap_malloc( +/*===========*/ + /* out: pointer to allocated memory */ + ib_alloc_t* allocator, /* in: allocator */ + ulint size); /* in: size in bytes */ + +/******************************************************************** +The default ib_vector_t heap resize. Since we can't resize the heap +we have to copy the elements from the old ptr to the new ptr. +Uses mem_heap_alloc(). */ +UNIV_INLINE +void* +ib_heap_resize( /*===========*/ - ib_vector_t* vec); /*!< in,own: vector */ + /* out: pointer to reallocated + memory */ + ib_alloc_t* allocator, /* in: allocator */ + void* old_ptr, /* in: pointer to memory */ + ulint old_size, /* in: old size in bytes */ + ulint new_size); /* in: new size in bytes */ -/** An automatically resizing vector data type. */ +/******************************************************************** +Create a heap allocator that uses the passed in heap. */ +UNIV_INLINE +ib_alloc_t* +ib_heap_allocator_create( +/*=====================*/ + /* out: heap allocator instance */ + mem_heap_t* heap); /* in: heap to use */ + +/******************************************************************** +Free a heap allocator. */ +UNIV_INLINE +void +ib_heap_allocator_free( +/*===================*/ + ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */ + +/******************************************************************** +Wrapper for ut_free(). */ +UNIV_INLINE +void +ib_ut_free( +/*=======*/ + ib_alloc_t* allocator, /* in: allocator */ + void* ptr); /* in: size in bytes */ + +/******************************************************************** +Wrapper for ut_malloc(). */ +UNIV_INLINE +void* +ib_ut_malloc( +/*=========*/ + /* out: pointer to allocated memory */ + ib_alloc_t* allocator, /* in: allocator */ + ulint size); /* in: size in bytes */ + +/******************************************************************** +Wrapper for ut_realloc(). */ +UNIV_INLINE +void* +ib_ut_resize( +/*=========*/ + /* out: pointer to reallocated + memory */ + ib_alloc_t* allocator, /* in: allocator */ + void* old_ptr, /* in: pointer to memory */ + ulint old_size, /* in: old size in bytes */ + ulint new_size); /* in: new size in bytes */ + +/******************************************************************** +Create a heap allocator that uses the passed in heap. */ +UNIV_INLINE +ib_alloc_t* +ib_ut_allocator_create(void); +/*=========================*/ + +/******************************************************************** +Create a heap allocator that uses the passed in heap. */ +UNIV_INLINE +void +ib_ut_allocator_free( +/*=================*/ + ib_alloc_t* ib_ut_alloc); /* in: alloc instace to free */ + +/* Allocator used by ib_vector_t. */ +struct ib_alloc_struct { + ib_mem_alloc_t mem_malloc; /* For allocating memory */ + ib_mem_free_t mem_release; /* For freeing memory */ + ib_mem_resize_t mem_resize; /* For resizing memory */ + void* arg; /* Currently if not NULL then it + points to the heap instance */ +}; + +/* See comment at beginning of file. */ struct ib_vector_struct { - mem_heap_t* heap; /*!< heap */ - void** data; /*!< data elements */ - ulint used; /*!< number of elements currently used */ - ulint total; /*!< number of elements allocated */ + ib_alloc_t* allocator; /* Allocator, because one size + doesn't fit all */ + void* data; /* data elements */ + ulint used; /* number of elements currently used */ + ulint total; /* number of elements allocated */ + /* Size of a data item */ + ulint sizeof_value; }; #ifndef UNIV_NONINL #include "ut0vec.ic" #endif -#endif +#endif /* IB_VECTOR_H */ diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic index 34c858868ce..1255caee2d9 100644 --- a/storage/innobase/include/ut0vec.ic +++ b/storage/innobase/include/ut0vec.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -23,21 +23,169 @@ A vector of pointers to data items Created 4/6/2006 Osku Salerma ************************************************************************/ -/****************************************************************//** -Get number of elements in vector. -@return number of elements in vector */ +#define IB_VEC_OFFSET(v, i) (vec->sizeof_value * i) + +/******************************************************************** +The default ib_vector_t heap malloc. Uses mem_heap_alloc(). */ +UNIV_INLINE +void* +ib_heap_malloc( +/*===========*/ + ib_alloc_t* allocator, /* in: allocator */ + ulint size) /* in: size in bytes */ +{ + mem_heap_t* heap = (mem_heap_t*) allocator->arg; + + return(mem_heap_alloc(heap, size)); +} + +/******************************************************************** +The default ib_vector_t heap free. Does nothing. */ +UNIV_INLINE +void +ib_heap_free( +/*=========*/ + ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */ + void* ptr UNIV_UNUSED) /* in: size in bytes */ +{ + /* We can't free individual elements. */ +} + +/******************************************************************** +The default ib_vector_t heap resize. Since we can't resize the heap +we have to copy the elements from the old ptr to the new ptr. +Uses mem_heap_alloc(). */ +UNIV_INLINE +void* +ib_heap_resize( +/*===========*/ + ib_alloc_t* allocator, /* in: allocator */ + void* old_ptr, /* in: pointer to memory */ + ulint old_size, /* in: old size in bytes */ + ulint new_size) /* in: new size in bytes */ +{ + void* new_ptr; + mem_heap_t* heap = (mem_heap_t*) allocator->arg; + + new_ptr = mem_heap_alloc(heap, new_size); + memcpy(new_ptr, old_ptr, old_size); + + return(new_ptr); +} + +/******************************************************************** +Create a heap allocator that uses the passed in heap. */ +UNIV_INLINE +ib_alloc_t* +ib_heap_allocator_create( +/*=====================*/ + mem_heap_t* heap) /* in: heap to use */ +{ + ib_alloc_t* heap_alloc; + + heap_alloc = (ib_alloc_t*) mem_heap_alloc(heap, sizeof(*heap_alloc)); + + heap_alloc->arg = heap; + heap_alloc->mem_release = ib_heap_free; + heap_alloc->mem_malloc = ib_heap_malloc; + heap_alloc->mem_resize = ib_heap_resize; + + return(heap_alloc); +} + +/******************************************************************** +Free a heap allocator. */ +UNIV_INLINE +void +ib_heap_allocator_free( +/*===================*/ + ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */ +{ + mem_heap_free((mem_heap_t*) ib_ut_alloc->arg); +} + +/******************************************************************** +Wrapper around ut_malloc(). */ +UNIV_INLINE +void* +ib_ut_malloc( +/*=========*/ + ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */ + ulint size) /* in: size in bytes */ +{ + return(ut_malloc(size)); +} + +/******************************************************************** +Wrapper around ut_free(). */ +UNIV_INLINE +void +ib_ut_free( +/*=======*/ + ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */ + void* ptr) /* in: size in bytes */ +{ + ut_free(ptr); +} + +/******************************************************************** +Wrapper aroung ut_realloc(). */ +UNIV_INLINE +void* +ib_ut_resize( +/*=========*/ + ib_alloc_t* allocator UNIV_UNUSED, /* in: allocator */ + void* old_ptr, /* in: pointer to memory */ + ulint old_size UNIV_UNUSED,/* in: old size in bytes */ + ulint new_size) /* in: new size in bytes */ +{ + return(ut_realloc(old_ptr, new_size)); +} + +/******************************************************************** +Create a ut allocator. */ +UNIV_INLINE +ib_alloc_t* +ib_ut_allocator_create(void) +/*========================*/ +{ + ib_alloc_t* ib_ut_alloc; + + ib_ut_alloc = (ib_alloc_t*) ut_malloc(sizeof(*ib_ut_alloc)); + + ib_ut_alloc->arg = NULL; + ib_ut_alloc->mem_release = ib_ut_free; + ib_ut_alloc->mem_malloc = ib_ut_malloc; + ib_ut_alloc->mem_resize = ib_ut_resize; + + return(ib_ut_alloc); +} + +/******************************************************************** +Free a ut allocator. */ +UNIV_INLINE +void +ib_ut_allocator_free( +/*=================*/ + ib_alloc_t* ib_ut_alloc) /* in: alloc instace to free */ +{ + ut_free(ib_ut_alloc); +} + +/******************************************************************** +Get number of elements in vector. */ UNIV_INLINE ulint ib_vector_size( /*===========*/ - const ib_vector_t* vec) /*!< in: vector */ + /* out: number of elements in vector*/ + const ib_vector_t* vec) /* in: vector */ { return(vec->used); } /****************************************************************//** -Get n'th element. -@return n'th element */ +Get n'th element. */ UNIV_INLINE void* ib_vector_get( @@ -47,9 +195,23 @@ ib_vector_get( { ut_a(n < vec->used); - return(vec->data[n]); + return((byte*) vec->data + IB_VEC_OFFSET(vec, n)); } +/******************************************************************** +Const version of the get n'th element. +@return n'th element */ +UNIV_INLINE +const void* +ib_vector_get_const( +/*================*/ + const ib_vector_t* vec, /* in: vector */ + ulint n) /* in: element index to get */ +{ + ut_a(n < vec->used); + + return((byte*) vec->data + IB_VEC_OFFSET(vec, n)); +} /****************************************************************//** Get last element. The vector must not be empty. @return last element */ @@ -61,7 +223,7 @@ ib_vector_get_last( { ut_a(vec->used > 0); - return(vec->data[vec->used - 1]); + return((byte*) ib_vector_get(vec, vec->used - 1)); } /****************************************************************//** @@ -74,9 +236,52 @@ ib_vector_set( ulint n, /*!< in: element index to set */ void* elem) /*!< in: data element */ { + void* slot; + ut_a(n < vec->used); - vec->data[n] = elem; + slot = ((byte*) vec->data + IB_VEC_OFFSET(vec, n)); + memcpy(slot, elem, vec->sizeof_value); +} + +/******************************************************************** +Reset the vector size to 0 elements. */ +UNIV_INLINE +void +ib_vector_reset( +/*============*/ + /* out: void */ + ib_vector_t* vec) /* in: vector */ +{ + vec->used = 0; +} + +/******************************************************************** +Get the last element of the vector. */ +UNIV_INLINE +void* +ib_vector_last( +/*===========*/ + /* out: void */ + ib_vector_t* vec) /* in: vector */ +{ + ut_a(ib_vector_size(vec) > 0); + + return(ib_vector_get(vec, ib_vector_size(vec) - 1)); +} + +/******************************************************************** +Get the last element of the vector. */ +UNIV_INLINE +const void* +ib_vector_last_const( +/*=================*/ + /* out: void */ + const ib_vector_t* vec) /* in: vector */ +{ + ut_a(ib_vector_size(vec) > 0); + + return(ib_vector_get_const(vec, ib_vector_size(vec) - 1)); } /****************************************************************//** @@ -86,35 +291,129 @@ UNIV_INLINE void* ib_vector_pop( /*==========*/ - ib_vector_t* vec) /*!< in/out: vector */ + /* out: pointer to element */ + ib_vector_t* vec) /* in: vector */ { - void* elem; + void* elem; ut_a(vec->used > 0); - --vec->used; - elem = vec->data[vec->used]; - ut_d(vec->data[vec->used] = NULL); - UNIV_MEM_INVALID(&vec->data[vec->used], sizeof(*vec->data)); + elem = ib_vector_last(vec); + --vec->used; return(elem); } -/****************************************************************//** -Free the underlying heap of the vector. Note that vec is invalid -after this call. */ +/******************************************************************** +Append an element to the vector, if elem != NULL then copy the data +from elem.*/ +UNIV_INLINE +void* +ib_vector_push( +/*===========*/ + /* out: pointer to the "new" element */ + ib_vector_t* vec, /* in: vector */ + const void* elem) /* in: element to add (can be NULL) */ +{ + void* last; + + if (vec->used >= vec->total) { + ib_vector_resize(vec); + } + + last = (byte*) vec->data + IB_VEC_OFFSET(vec, vec->used); + +#ifdef UNIV_DEBUG + memset(last, 0, vec->sizeof_value); +#endif + + if (elem) { + memcpy(last, elem, vec->sizeof_value); + } + + ++vec->used; + + return(last); +} + +/*******************************************************************//** +Remove an element to the vector +@return pointer to the "removed" element */ +UNIV_INLINE +void* +ib_vector_remove( +/*=============*/ + ib_vector_t* vec, /*!< in: vector */ + const void* elem) /*!< in: value to remove */ +{ + void* current; + void* next; + ulint i; + + for (i = 0; i < vec->used; i++) { + current = ib_vector_get(vec, i); + + if (*(void**) current == elem) { + if (i == vec->used - 1) { + return(ib_vector_pop(vec)); + } + + next = ib_vector_get(vec, i + 1); + memcpy(current, next, vec->sizeof_value + * (vec->used - i - 1)); + } + } + + --vec->used; + + return(current); +} + +/******************************************************************** +Sort the vector elements. */ +UNIV_INLINE +void +ib_vector_sort( +/*===========*/ + /* out: void */ + ib_vector_t* vec, /* in: vector */ + ib_compare_t compare)/* in: the comparator to use for sort */ +{ + qsort(vec->data, vec->used, vec->sizeof_value, compare); +} + +/******************************************************************** +Destroy the vector. Make sure the vector owns the allocator, e.g., +the heap in the the heap allocator. */ UNIV_INLINE void ib_vector_free( /*===========*/ - ib_vector_t* vec) /*!< in, own: vector */ + ib_vector_t* vec) /* in, own: vector */ { - mem_heap_free(vec->heap); + /* Currently we only support two types of allocators, heap + and ut_malloc(), when the heap is freed all the elements are + freed too. With ut allocator, we need to free the elements, + the vector instance and the allocator separately. */ + + /* Only the heap allocator uses the arg field. */ + if (vec->allocator->arg) { + mem_heap_free((mem_heap_t*) vec->allocator->arg); + } else { + ib_alloc_t* allocator; + + allocator = vec->allocator; + + allocator->mem_release(allocator, vec->data); + allocator->mem_release(allocator, vec); + + ib_ut_allocator_free(allocator); + } } -/****************************************************************//** +/******************************************************************** Test whether a vector is empty or not. -@return TRUE if empty */ +@return TRUE if empty */ UNIV_INLINE ibool ib_vector_is_empty( diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h index 2ec0f16ab05..ed4e65e4dc6 100644 --- a/storage/innobase/include/ut0wqueue.h +++ b/storage/innobase/include/ut0wqueue.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2006, 2009, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -11,8 +11,8 @@ ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA *****************************************************************************/ @@ -66,6 +66,16 @@ ib_wqueue_add( mem_heap_t* heap); /*!< in: memory heap to use for allocating the list node */ +/******************************************************************** +Check if queue is empty. */ + +ibool +ib_wqueue_is_empty( +/*===============*/ + /* out: TRUE if queue empty + else FALSE */ + const ib_wqueue_t* wq); /* in: work queue */ + /****************************************************************//** Wait for a work item to appear in the queue. @return work item */ @@ -75,6 +85,16 @@ ib_wqueue_wait( /*===========*/ ib_wqueue_t* wq); /*!< in: work queue */ +/******************************************************************** +Wait for a work item to appear in the queue for specified time. */ + +void* +ib_wqueue_timedwait( +/*================*/ + /* out: work item or NULL on timeout*/ + ib_wqueue_t* wq, /* in: work queue */ + ib_time_t wait_in_usecs); /* in: wait time in micro seconds */ + /* Work queue. */ struct ib_wqueue_struct { mutex_t mutex; /*!< mutex protecting everything */ |