/* Copyright (C) 2008 Sun AB and Michael Widenius This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; version 2 of the License. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */ /* Functions to maintain live statistics for Maria transactional tables and versioning for not transactional tables See WL#3138; Maria - fast "SELECT COUNT(*) FROM t;" and "CHECKSUM TABLE t" for details about live number of rows and live checksums TODO - Allocate MA_USED_TABLES and MA_HISTORY_STATE from a global pool (to avoid calls to malloc() - In trnamn_end_trans_hook(), don't call _ma_remove_not_visible_states() every time. One could for example call it if there has been more than 10 ended transactions since last time it was called. */ #include "maria_def.h" #include "trnman.h" #include "ma_trnman.h" #include "ma_blockrec.h" /** @brief Setup initial start-of-transaction state for a table @fn _ma_setup_live_state @param info Maria handler @notes This function ensures that trn->used_tables contains a list of start and live states for tables that are part of the transaction and that info->state points to the current live state for the table. @TODO Change trn->table_list to a hash and share->state_history to a binary tree @return @retval 0 ok @retval 1 error (out of memory) */ my_bool _ma_setup_live_state(MARIA_HA *info) { TRN *trn; MARIA_SHARE *share= info->s; MARIA_USED_TABLES *tables; MARIA_STATE_HISTORY *history; DBUG_ENTER("_ma_setup_live_state"); DBUG_PRINT("enter", ("info: %p", info)); DBUG_ASSERT(share->lock_key_trees); if (maria_create_trn_hook(info)) DBUG_RETURN(1); trn= info->trn; for (tables= (MARIA_USED_TABLES*) trn->used_tables; tables; tables= tables->next) { if (tables->share == share) { /* Table is already used by transaction */ goto end; } } /* Table was not used before, create new table state entry */ if (!(tables= (MARIA_USED_TABLES*) my_malloc(sizeof(*tables), MYF(MY_WME | MY_ZEROFILL)))) DBUG_RETURN(1); tables->next= trn->used_tables; trn->used_tables= tables; tables->share= share; mysql_mutex_lock(&share->intern_lock); share->in_trans++; DBUG_PRINT("info", ("share: %p in_trans: %d", share, share->in_trans)); history= share->state_history; /* We must keep share locked to ensure that we don't access a history link that is deleted by concurrently running checkpoint. It's enough to compare trids here (instead of calling tranman_can_read_from) as history->trid is a commit_trid */ while (trn->trid <= history->trid) history= history->next; mysql_mutex_unlock(&share->intern_lock); /* The current item can't be deleted as it's the first one visible for us */ tables->state_start= tables->state_current= history->state; tables->state_current.changed= tables->state_current.no_transid= 0; DBUG_PRINT("info", ("records: %ld", (ulong) tables->state_start.records)); end: info->state_start= &tables->state_start; info->state= &tables->state_current; info->used_tables= tables; tables->use_count++; /* Mark in transaction state if we are not using transid (versioning) on rows. If not, then we will in _ma_trnman_end_trans_hook() ensure that the state is visible for all at end of transaction */ tables->state_current.no_transid|= !(info->row_flag & ROW_FLAG_TRANSID); DBUG_PRINT("exit", ("tables: %p info->state: %p", tables, info->state)); DBUG_RETURN(0); } /** @brief Remove states that are not visible by anyone @fn _ma_remove_not_visible_states() @param org_history List to history @param all 1 if we should delete the first state if it's visible for all. For the moment this is only used on close() of table. @param trnman_is_locked Set to 1 if we have already a lock on trnman. @notes The assumption is that items in the history list is ordered by commit_trid. A state is not visible anymore if there is no new transaction that has been started between the commit_trid's of two states As long as some states exists, we keep the newest = (last commit) state as first state in the history. This is to allow us to just move the history from the global list to the share when we open the table. Note that if 'all' is set trnman_is_locked must be 0, becasue trnman_get_min_trid() will take a lock on trnman. @return @retval Pointer to new history list */ MARIA_STATE_HISTORY *_ma_remove_not_visible_states(MARIA_STATE_HISTORY *org_history, my_bool all, my_bool trnman_is_locked) { TrID last_trid; MARIA_STATE_HISTORY *history, **parent, *next; DBUG_ENTER("_ma_remove_not_visible_states"); if (!org_history) DBUG_RETURN(0); /* Not versioned table */ last_trid= org_history->trid; parent= &org_history->next; for (history= org_history->next; history; history= next) { next= history->next; if (!trnman_exists_active_transactions(history->trid, last_trid, trnman_is_locked)) { DBUG_PRINT("info", ("removing history->trid: %lu next: %lu", (ulong) history->trid, (ulong) last_trid)); my_free(history); continue; } *parent= history; parent= &history->next; last_trid= history->trid; } *parent= 0; if (all && parent == &org_history->next) { /* There is only one state left. Delete this if it's visible for all */ if (last_trid < trnman_get_min_trid()) { my_free(org_history); org_history= 0; } } DBUG_RETURN(org_history); } /** @brief Remove not used state history @param share Maria table information @param all 1 if we should delete the first state if it's visible for all. For the moment this is only used on close() of table. @notes share and trnman are not locked. We must first lock trnman and then share->intern_lock. This is becasue _ma_trnman_end_trans_hook() has a lock on trnman and then takes share->intern_lock. */ void _ma_remove_not_visible_states_with_lock(MARIA_SHARE *share, my_bool all) { my_bool is_lock_trman; if ((is_lock_trman= trman_is_inited())) trnman_lock(); mysql_mutex_lock(&share->intern_lock); share->state_history= _ma_remove_not_visible_states(share->state_history, all, 1); mysql_mutex_unlock(&share->intern_lock); if (is_lock_trman) trnman_unlock(); } /* Free state history information from share->history and reset information to current state. @notes Used after repair/rename/drop as then all rows are visible for everyone */ void _ma_reset_state(MARIA_HA *info) { MARIA_SHARE *share= info->s; MARIA_STATE_HISTORY *history= share->state_history; DBUG_ENTER("_ma_reset_state"); /* Always true if share->now_transactional is set */ if (history && share->have_versioning) { MARIA_STATE_HISTORY *next; DBUG_PRINT("info", ("resetting history")); /* Set the current history to current state */ share->state_history->state= share->state.state; /* Set current table handler to point to new history state */ info->state= info->state_start= &share->state_history->state; for (history= history->next ; history ; history= next) { next= history->next; my_free(history); } share->state_history->next= 0; share->state_history->trid= 0; /* Visible for all */ } DBUG_VOID_RETURN; } /**************************************************************************** The following functions are called by thr_lock() in threaded applications for not transactional tables ****************************************************************************/ /* Create a copy of the current status for the table SYNOPSIS _ma_get_status() param Pointer to Myisam handler concurrent_insert Set to 1 if we are going to do concurrent inserts (THR_WRITE_CONCURRENT_INSERT was used) */ void _ma_get_status(void* param, my_bool concurrent_insert) { MARIA_HA *info=(MARIA_HA*) param; DBUG_ENTER("_ma_get_status"); DBUG_PRINT("info",("key_file: %ld data_file: %ld concurrent_insert: %d", (long) info->s->state.state.key_file_length, (long) info->s->state.state.data_file_length, concurrent_insert)); #ifndef DBUG_OFF if (info->state->key_file_length > info->s->state.state.key_file_length || info->state->data_file_length > info->s->state.state.data_file_length) DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld", (long) info->state->key_file_length, (long) info->state->data_file_length)); #endif info->state_save= info->s->state.state; info->state= &info->state_save; info->state->changed= 0; info->append_insert_at_end= concurrent_insert; DBUG_VOID_RETURN; } void _ma_update_status(void* param) { MARIA_HA *info=(MARIA_HA*) param; /* Because someone may have closed the table we point at, we only update the state if its our own state. This isn't a problem as we are always pointing at our own lock or at a read lock. (This is enforced by thr_multi_lock.c) */ if (info->state == &info->state_save) { MARIA_SHARE *share= info->s; #ifndef DBUG_OFF DBUG_PRINT("info",("updating status: key_file: %ld data_file: %ld", (long) info->state->key_file_length, (long) info->state->data_file_length)); if (info->state->key_file_length < share->state.state.key_file_length || info->state->data_file_length < share->state.state.data_file_length) DBUG_PRINT("warning",("old info: key_file: %ld data_file: %ld", (long) share->state.state.key_file_length, (long) share->state.state.data_file_length)); #endif /* we are going to modify the state without lock's log, this would break recovery if done with a transactional table. */ DBUG_ASSERT(!info->s->base.born_transactional); share->state.state= *info->state; info->state= &share->state.state; #ifdef HAVE_QUERY_CACHE DBUG_PRINT("info", ("invalidator... '%s' (status update)", info->s->data_file_name.str)); DBUG_ASSERT(info->s->chst_invalidator != NULL); (*info->s->chst_invalidator)((const char *)info->s->data_file_name.str); #endif } info->append_insert_at_end= 0; } /* Same as ma_update_status() but take a lock in the table lock, to protect against someone calling ma_get_status() from thr_lock() at the same time. */ void _ma_update_status_with_lock(MARIA_HA *info) { my_bool locked= 0; if (info->state == &info->state_save) { locked= 1; mysql_mutex_lock(&info->s->lock.mutex); } (*info->s->lock.update_status)(info); if (locked) mysql_mutex_unlock(&info->s->lock.mutex); } void _ma_restore_status(void *param) { MARIA_HA *info= (MARIA_HA*) param; info->state= &info->s->state.state; info->append_insert_at_end= 0; } void _ma_copy_status(void* to, void *from) { ((MARIA_HA*) to)->state= &((MARIA_HA*) from)->state_save; } void _ma_reset_update_flag(void *param, my_bool concurrent_insert __attribute__((unused))) { MARIA_HA *info=(MARIA_HA*) param; info->state->changed= 0; } my_bool _ma_start_trans(void* param) { MARIA_HA *info=(MARIA_HA*) param; if (!info->s->lock_key_trees) { info->state= info->state_start; *info->state= info->s->state.state; } return 0; } /** @brief Check if should allow concurrent inserts @implementation Allow concurrent inserts if we don't have a hole in the table or if there is no active write lock and there is active read locks and maria_concurrent_insert == 2. In this last case the new row('s) are inserted at end of file instead of filling up the hole. The last case is to allow one to inserts into a heavily read-used table even if there is holes. @notes If there is a an rtree indexes in the table, concurrent inserts are disabled in maria_open() @return @retval 0 ok to use concurrent inserts @retval 1 not ok */ my_bool _ma_check_status(void *param) { MARIA_HA *info=(MARIA_HA*) param; /* The test for w_locks == 1 is here because this thread has already done an external lock (in other words: w_locks == 1 means no other threads has a write lock) */ DBUG_PRINT("info",("dellink: %ld r_locks: %u w_locks: %u", (long) info->s->state.dellink, (uint) info->s->r_locks, (uint) info->s->w_locks)); return (my_bool) !(info->s->state.dellink == HA_OFFSET_ERROR || (maria_concurrent_insert == 2 && info->s->r_locks && info->s->w_locks == 1)); } /** @brief write hook at end of trans to store status for all used table @Notes This function must be called under trnman_lock in trnman_end_trn() because of the following reasons: - After trnman_end_trn() is called, the current transaction will be regarded as committed and all used tables state_history will be visible to other transactions. To do this, we loop over all used tables and create/update a history entries that contains the correct state_history for them. */ my_bool _ma_trnman_end_trans_hook(TRN *trn, my_bool commit, my_bool active_transactions) { my_bool error= 0; MARIA_USED_TABLES *tables, *next; DBUG_ENTER("_ma_trnman_end_trans_hook"); DBUG_PRINT("enter", ("trn: %p used_tables: %p", trn, trn->used_tables)); for (tables= (MARIA_USED_TABLES*) trn->used_tables; tables; tables= next) { MARIA_SHARE *share= tables->share; next= tables->next; if (commit) { MARIA_STATE_HISTORY *history; mysql_mutex_lock(&share->intern_lock); /* We only have to update history state if something changed */ if (tables->state_current.changed) { if (tables->state_current.no_transid) { /* The change was done without using transid on rows (like in bulk insert). In this case this thread is the only one that is using the table and all rows will be visble for all transactions. */ _ma_reset_history(share); } else { if (active_transactions && share->now_transactional && trnman_exists_active_transactions(share->state_history->trid, trn->commit_trid, 1)) { /* There exist transactions that are still using the current share->state_history. Create a new history item for this commit and add it first in the state_history list. This ensures that all history items are stored in the list in decresing trid order. */ if (!(history= my_malloc(sizeof(*history), MYF(MY_WME)))) { /* purecov: begin inspected */ error= 1; mysql_mutex_unlock(&share->intern_lock); my_free(tables); continue; /* purecov: end */ } history->state= share->state_history->state; history->next= share->state_history; share->state_history= history; } else { /* Previous history can't be seen by anyone, reuse old memory */ history= share->state_history; DBUG_PRINT("info", ("removing history->trid: %lu new: %lu", (ulong) history->trid, (ulong) trn->commit_trid)); } history->state.records+= (tables->state_current.records - tables->state_start.records); history->state.checksum+= (tables->state_current.checksum - tables->state_start.checksum); history->trid= trn->commit_trid; share->state.last_change_trn= trn->commit_trid; if (history->next) { /* Remove not visible states */ share->state_history= _ma_remove_not_visible_states(history, 0, 1); } DBUG_PRINT("info", ("share: %p in_trans: %d", share, share->in_trans)); } } share->in_trans--; mysql_mutex_unlock(&share->intern_lock); } else { #ifdef DBUG_ASSERT_EXISTS /* We need to keep share->in_trans correct in the debug library because of the assert in maria_close() */ mysql_mutex_lock(&share->intern_lock); share->in_trans--; mysql_mutex_unlock(&share->intern_lock); #endif } my_free(tables); } trn->used_tables= 0; trn->used_instances= 0; DBUG_RETURN(error); } /** Remove table from trnman_list @notes This is used when we unlock a table from a group of locked tables just before doing a rename or drop table. share->internal_lock must be locked when function is called */ void _ma_remove_table_from_trnman(MARIA_HA *info) { MARIA_SHARE *share= info->s; TRN *trn= info->trn; MARIA_USED_TABLES *tables, **prev; DBUG_ENTER("_ma_remove_table_from_trnman"); DBUG_PRINT("enter", ("trn: %p used_tables: %p share: %p in_trans: %d", trn, trn->used_tables, share, share->in_trans)); mysql_mutex_assert_owner(&share->intern_lock); if (trn == &dummy_transaction_object) DBUG_VOID_RETURN; /* First remove share from used_tables */ for (prev= (MARIA_USED_TABLES**) (char*) &trn->used_tables; (tables= *prev); prev= &tables->next) { if (tables->share == share) { *prev= tables->next; share->in_trans--; my_free(tables); break; } } if (!tables) { /* This can only happens in case of rename of intermediate table as part of alter table */ DBUG_PRINT("warning", ("share: %p where not in used_tables_list", share)); } /* Reset trn and remove table from used_instances */ _ma_reset_trn_for_table(info); DBUG_VOID_RETURN; } /**************************************************************************** The following functions are called by thr_lock() in threaded applications for transactional tables. ****************************************************************************/ /* Create a copy of the current status for the table SYNOPSIS _ma_get_status() param Pointer to Aria handler concurrent_insert Set to 1 if we are going to do concurrent inserts (THR_WRITE_CONCURRENT_INSERT was used) */ void _ma_block_get_status(void* param, my_bool concurrent_insert) { MARIA_HA *info=(MARIA_HA*) param; DBUG_ENTER("_ma_block_get_status"); DBUG_PRINT("enter", ("concurrent_insert %d", concurrent_insert)); info->row_base_length= info->s->base_length; info->row_flag= info->s->base.default_row_flag; if (concurrent_insert) { DBUG_ASSERT(info->lock.type == TL_WRITE_CONCURRENT_INSERT); info->row_flag|= ROW_FLAG_TRANSID; info->row_base_length+= TRANSID_SIZE; } else { DBUG_ASSERT(info->lock.type != TL_WRITE_CONCURRENT_INSERT); } DBUG_VOID_RETURN; } my_bool _ma_block_start_trans(void* param) { MARIA_HA *info=(MARIA_HA*) param; DBUG_ENTER("_ma_block_start_trans"); if (info->s->lock_key_trees) { /* Assume for now that this doesn't fail (It can only fail in out of memory conditions) TODO: Fix this by having one extra state pre-allocated */ DBUG_RETURN(_ma_setup_live_state(info)); } else { /* We come here in the following cases: - The table is a temporary table - It's a table which is crash safe but not yet versioned, for example a table with fulltext or rtree keys Set the current state to point to save_state so that the block_format code don't count the same record twice. Copy also the current state. This may have been wrong if the same file was used several times in the last statement */ info->state= info->state_start; *info->state= info->s->state.state; } /* Info->trn is set if this table is already handled and we are called from maria_versioning() */ if (info->s->base.born_transactional && !info->trn) { /* Assume for now that this doesn't fail (It can only fail in out of memory conditions) */ DBUG_RETURN(maria_create_trn_hook(info) != 0); } DBUG_RETURN(0); } void _ma_block_update_status(void *param __attribute__((unused))) { } void _ma_block_restore_status(void *param __attribute__((unused))) { } /** Check if should allow concurrent inserts @return @retval 0 ok to use concurrent inserts @retval 1 not ok */ my_bool _ma_block_check_status(void *param __attribute__((unused))) { return (my_bool) 0; } /* Get status when transactional but not versioned */ my_bool _ma_block_start_trans_no_versioning(void* param) { MARIA_HA *info=(MARIA_HA*) param; DBUG_ENTER("_ma_block_start_trans_no_versioning"); DBUG_ASSERT(info->s->base.born_transactional && !info->s->lock_key_trees); info->state->changed= 0; /* from _ma_reset_update_flag() */ info->state= info->state_start; *info->state= info->s->state.state; if (!info->trn) { /* Assume for now that this doesn't fail (It can only fail in out of memory conditions) */ DBUG_RETURN(maria_create_trn_hook(info)); } DBUG_RETURN(0); } /** Enable/disable versioning */ void maria_versioning(MARIA_HA *info, my_bool versioning) { MARIA_SHARE *share= info->s; DBUG_ENTER("maria_versioning"); /* For now, this is a hack */ if (share->have_versioning) { enum thr_lock_type save_lock_type; share->lock_key_trees= versioning; /* Set up info->lock.type temporary for _ma_block_get_status() */ save_lock_type= info->lock.type; info->lock.type= versioning ? TL_WRITE_CONCURRENT_INSERT : TL_WRITE; _ma_block_get_status((void*) info, versioning); info->lock.type= save_lock_type; if (versioning) info->state= &share->state.common; else info->state= &share->state.state; /* Change global values by default */ info->state_start= info->state; /* Initial values */ } DBUG_VOID_RETURN; } /** Update data_file_length to new length NOTES Only used by block records */ void _ma_set_share_data_file_length(MARIA_SHARE *share, ulonglong new_length) { if (!share->internal_table) mysql_mutex_lock(&share->intern_lock); if (share->state.state.data_file_length < new_length) { share->state.state.data_file_length= new_length; if (new_length >= share->base.max_data_file_length) { /* Give an error on next insert */ share->state.changed|= STATE_DATA_FILE_FULL; } } if (!share->internal_table) mysql_mutex_unlock(&share->intern_lock); } /** Copy state information that where updated while the table was used in not transactional mode */ void _ma_copy_nontrans_state_information(MARIA_HA *info) { info->s->state.state.records= info->state->records; info->s->state.state.checksum= info->state->checksum; } /** Reset history This is only called during repair when we are the only one using the table. */ void _ma_reset_history(MARIA_SHARE *share) { MARIA_STATE_HISTORY *history, *next; DBUG_ENTER("_ma_reset_history"); share->state_history->trid= 0; /* Visibly by all */ share->state_history->state= share->state.state; history= share->state_history->next; share->state_history->next= 0; for (; history; history= next) { next= history->next; my_free(history); } DBUG_VOID_RETURN; } /**************************************************************************** Virtual functions to check if row is visible ****************************************************************************/ /** Row is always visible This is for tables without concurrent insert */ my_bool _ma_row_visible_always(MARIA_HA *info __attribute__((unused))) { return 1; } /** Row visibility for non transactional tables with concurrent insert @implementation When we got our table lock, we saved the current data_file_length. Concurrent inserts always go to the end of the file. So we can test if the found key references a new record. */ my_bool _ma_row_visible_non_transactional_table(MARIA_HA *info) { return info->cur_row.lastpos < info->state->data_file_length; } /** Row visibility for transactional tables with versioning @TODO Add test if found key was marked deleted and it was deleted by us. In that case we should return 0 */ my_bool _ma_row_visible_transactional_table(MARIA_HA *info) { return trnman_can_read_from(info->trn, info->cur_row.trid); }