diff options
Diffstat (limited to 'innobase/log/trash/log0trsh.c')
-rw-r--r-- | innobase/log/trash/log0trsh.c | 648 |
1 files changed, 648 insertions, 0 deletions
diff --git a/innobase/log/trash/log0trsh.c b/innobase/log/trash/log0trsh.c new file mode 100644 index 00000000000..7f48118a0d1 --- /dev/null +++ b/innobase/log/trash/log0trsh.c @@ -0,0 +1,648 @@ +/****************************************************** +Recovery + +(c) 1997 Innobase Oy + +Created 9/20/1997 Heikki Tuuri +*******************************************************/ + +#include "log0recv.h" + +#ifdef UNIV_NONINL +#include "log0recv.ic" +#endif + +#include "mem0mem.h" +#include "buf0buf.h" +#include "buf0flu.h" +#include "srv0srv.h" + +/* Size of block reads when the log groups are scanned forward to do +roll-forward */ +#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE) + +/* Size of block reads when the log groups are scanned backwards to synchronize +them */ +#define RECV_BACK_SCAN_SIZE (4 * UNIV_PAGE_SIZE) + +recv_sys_t* recv_sys = NULL; + +recv_recover_page(block->frame, block->space, block->offset); + +/************************************************************ +Creates the recovery system. */ + +void +recv_sys_create(void) +/*=================*/ +{ + ut_a(recv_sys == NULL); + + recv_sys = mem_alloc(sizeof(recv_t)); + + mutex_create(&(recv_sys->mutex)); + + recv_sys->hash = NULL; + recv_sys->heap = NULL; +} + +/************************************************************ +Inits the recovery system for a recovery operation. */ + +void +recv_sys_init(void) +/*===============*/ +{ + recv_sys->hash = hash_create(buf_pool_get_curr_size() / 64); + recv_sys->heap = mem_heap_create_in_buffer(256); +} + +/************************************************************ +Empties the recovery system. */ + +void +recv_sys_empty(void) +/*================*/ +{ + mutex_enter(&(recv_sys->mutex)); + + hash_free(recv_sys->hash); + mem_heap_free(recv_sys->heap); + + recv_sys->hash = NULL; + recv_sys->heap = NULL; + + mutex_exit(&(recv_sys->mutex)); +} + +/*********************************************************** +For recovery purposes copies the log buffer to a group to synchronize log +data. */ +static +void +recv_log_buf_flush( +/*===============*/ + log_group_t* group, /* in: log group */ + dulint start_lsn, /* in: start lsn of the log data in + the log buffer; must be divisible by + OS_FILE_LOG_BLOCK_SIZE */ + dulint end_lsn) /* in: end lsn of the log data in the + log buffer; must be divisible by + OS_FILE_LOG_BLOCK_SIZE */ +{ + ulint len; + + ut_ad(mutex_own(&(log_sys->mutex))); + + len = ut_dulint_minus(end_lsn, start_lsn); + + log_group_write_buf(LOG_RECOVER, group, log_sys->buf, len, start_lsn, + 0); +} + +/*********************************************************** +Compares two buffers containing log segments and determines the highest lsn +where they match, if any. */ +static +dulint +recv_log_bufs_cmp( +/*==============*/ + /* out: if no match found, ut_dulint_zero or + if start_lsn == LOG_START_LSN, returns + LOG_START_LSN; otherwise the highest matching + lsn */ + byte* recv_buf, /* in: buffer containing valid log data */ + byte* buf, /* in: buffer of data from a possibly + incompletely written log group */ + dulint start_lsn, /* in: buffer start lsn, must be divisible + by OS_FILE_LOG_BLOCK_SIZE and must be >= + LOG_START_LSN */ + dulint end_lsn, /* in: buffer end lsn, must be divisible + by OS_FILE_LOG_BLOCK_SIZE */ + dulint recovered_lsn) /* in: recovery succeeded up to this lsn */ +{ + ulint len; + ulint offset; + byte* log_block1; + byte* log_block2; + ulint no; + ulint data_len; + + ut_ad(ut_dulint_cmp(start_lsn, LOG_START_LSN) >= 0); + + if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) { + end_lsn = ut_dulint_align_up(recovered_lsn, + OS_FILE_LOG_BLOCK_SIZE); + } + + len = ut_dulint_minus(end_lsn, start_lsn); + + if (len == 0) { + + goto no_match; + } + + ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0); + + log_block1 = recv_buf + len; + log_block2 = buf + len; + + for (;;) { + log_block1 -= OS_FILE_LOG_BLOCK_SIZE; + log_block2 -= OS_FILE_LOG_BLOCK_SIZE; + + no = log_block_get_hdr_no(log_block1); + ut_a(no == log_block_get_trl_no(log_block1)); + + if ((no == log_block_get_hdr_no(log_block2)) + && (no == log_block_get_trl_no(log_block2))) { + + /* Match found if the block is not corrupted */ + + data_len = log_block_get_data_len(log_block2); + + if (0 == ut_memcmp(log_block1 + LOG_BLOCK_DATA, + log_block2 + LOG_BLOCK_DATA, + data_len - LOG_BLOCK_DATA)) { + + /* Match found */ + + return(ut_dulint_add(start_lsn, + log_block2 - buf + data_len)); + } + } + + if (log_block1 == recv_buf) { + + /* No match found */ + + break; + } + } +no_match: + if (ut_dulint_cmp(start_lsn, LOG_START_LSN) == 0) { + + return(LOG_START_LSN); + } + + return(ut_dulint_zero); +} + +/************************************************************ +Copies a log segment from the most up-to-date log group to the other log +group, so that it contains the latest log data. */ +static +void +recv_copy_group( +/*============*/ + log_group_t* up_to_date_group, /* in: the most up-to-date + log group */ + log_group_t* group, /* in: copy to this log group */ + dulint_lsn recovered_lsn) /* in: recovery succeeded up + to this lsn */ +{ + dulint start_lsn; + dulint end_lsn; + dulint match; + byte* buf; + byte* buf1; + + ut_ad(mutex_own(&(log_sys->mutex))); + + if (0 == ut_dulint_cmp(LOG_START_LSN, recovered_lsn)) { + + return; + } + + ut_ad(RECV_BACK_SCAN_SIZE <= log_sys->buf_size); + + buf1 = mem_alloc(2 * RECV_BACK_SCAN_SIZE); + buf = ut_align(buf, RECV_BACK_SCAN_SIZE);); + + end_lsn = ut_dulint_align_up(recovered_lsn, RECV_BACK_SCAN_SIZE); + + match = ut_dulint_zero; + + for (;;) { + if (ut_dulint_cmp(ut_dulint_add(LOG_START_LSN, + RECV_BACK_SCAN_SIZE), end_lsn) >= 0) { + start_lsn = LOG_START_LSN; + } else { + start_lsn = ut_dulint_subtract(end_lsn, + RECV_BACK_SCAN_SIZE); + } + + log_group_read_log_seg(LOG_RECOVER, buf, group, start_lsn, + end_lsn); + log_group_read_log_seg(LOG_RECOVER, log_sys->buf, + up_to_date_group, start_lsn, end_lsn); + + match = recv_log_bufs_cmp(log_sys->buf, buf, start_lsn, + end_lsn, recovered_lsn); + + if (ut_dulint_cmp(match, recovered_lsn) != 0) { + recv_log_buf_flush(group, start_lsn, end_lsn); + } + + if (!ut_dulint_zero(match)) { + + mem_free(buf1); + + return; + } + + end_lsn = start_lsn; + } +} + +/************************************************************ +Copies a log segment from the most up-to-date log group to the other log +groups, so that they all contain the latest log data. Also writes the info +about the latest checkpoint to the groups, and inits the fields in the group +memory structs to up-to-date values. */ + +void +recv_synchronize_groups( +/*====================*/ + log_group_t* up_to_date_group, /* in: the most up-to-date + log group */ + dulint_lsn recovered_lsn, /* in: recovery succeeded up + to this lsn */ + log_group_t* max_checkpoint_group) /* in: the group with the most + recent checkpoint info */ +{ + log_group_t* group; + + ut_ad(mutex_own(&(log_sys->mutex))); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + while (group) { + if (group != up_to_date_group) { + + /* Copy log data */ + + recv_copy_group(group, up_to_date_group, + recovered_lsn); + } + + if (group != max_checkpoint_group) { + + /* Copy the checkpoint info to the group */ + + log_group_checkpoint(group); + + mutex_exit(&(log_sys->mutex)); + + /* Wait for the checkpoint write to complete */ + rw_lock_s_lock(&(log_sys->checkpoint_lock)); + rw_lock_s_unlock(&(log_sys->checkpoint_lock)); + + mutex_enter(&(log_sys->mutex)); + } + + /* Update the fields in the group struct to correspond to + recovered_lsn */ + + log_group_set_fields(group, recovered_lsn); + + group = UT_LIST_GET_NEXT(log_groups, group); + } +} + +/************************************************************ +Looks for the maximum consistent checkpoint from the log groups. */ +static +ulint +recv_find_max_checkpoint( +/*=====================*/ + /* out: error code or DB_SUCCESS */ + log_group_t** max_group, /* out: max group */ + ulint* max_field) /* out: LOG_CHECKPOINT_1 or + LOG_CHECKPOINT_2 */ +{ + log_group_t* group; + dulint max_no; + dulint cp_no; + ulint field; + ulint fold; + byte* buf; + + ut_ad(mutex_own(&(log_sys->mutex))); + + /* Look for the latest checkpoint from the log groups */ + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + checkpoint_no = ut_dulint_zero; + checkpoint_lsn = ut_dulint_zero; + *max_group = NULL; + + buf = log_sys->checkpoint_buf; + + while (group) { + group->state = LOG_GROUP_CORRUPTED; + + for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2; + field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) { + + log_group_read_checkpoint_info(group, field); + + /* Check the consistency of the checkpoint info */ + fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); + + if (fold != mach_read_from_4(buf + + LOG_CHECKPOINT_CHECKSUM_1)) { + goto not_consistent; + } + + fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, + LOG_CHECKPOINT_CHECKSUM_2 + - LOG_CHECKPOINT_LSN); + if (fold != mach_read_from_4(buf + + LOG_CHECKPOINT_CHECKSUM_2)) { + goto not_consistent; + } + + group->state = LOG_GROUP_OK; + + group->lsn = mach_read_from_8(buf + + LOG_CHECKPOINT_LSN); + group->lsn_offset = mach_read_from_4(buf + + LOG_CHECKPOINT_OFFSET); + group->lsn_file_count = mach_read_from_4( + buf + LOG_CHECKPOINT_FILE_COUNT); + + cp_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO); + + if (ut_dulint_cmp(cp_no, max_no) >= 0) { + *max_group = group; + *max_field = field; + max_no = cp_no; + } + + not_consistent: + } + + group = UT_LIST_GET_NEXT(log_groups, group); + } + + if (*max_group == NULL) { + + return(DB_ERROR); + } + + return(DB_SUCCESS); +} + +/*********************************************************** +Parses log records from a buffer and stores them to a hash table to wait +merging to file pages. If the hash table becomes too big, merges automatically +it to file pages. */ +static +bool +recv_parse_and_hash_log_recs( +/*=========================*/ + /* out: TRUE if limit_lsn has been reached */ + byte* buf, /* in: buffer containing a log segment or + garbage */ + ulint len, /* in: buffer length */ + dulint start_lsn, /* in: buffer start lsn */ + dulint limit_lsn, /* in: recover at least to this lsn */ + dulint* recovered_lsn) /* out: was able to parse up to this lsn */ +{ + +} + +/************************************************************ +Recovers from a checkpoint. When this function returns, the database is able +to start processing new user transactions, but the function +recv_recovery_from_checkpoint_finish should be called later to complete +the recovery and free the resources used in it. */ + +ulint +recv_recovery_from_checkpoint_start( +/*================================*/ + /* out: error code or DB_SUCCESS */ + dulint limit_lsn) /* in: recover up to this lsn if possible */ +{ + log_group_t* max_cp_group; + log_group_t* up_to_date_group; + ulint max_cp_field; + byte* buf; + ulint err; + dulint checkpoint_lsn; + dulint checkpoint_no; + dulint recovered_lsn; + dulint old_lsn; + dulint end_lsn; + dulint start_lsn; + bool finished; + dulint flush_start_lsn; + + mutex_enter(&(log_sys->mutex)); + + /* Look for the latest checkpoint from any of the log groups */ + + err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field); + + if (err != DB_SUCCESS) { + mutex_exit(&(log_sys->mutex)); + + return(err); + } + + log_group_read_checkpoint_info(max_cp_group, max_cp_field); + + buf = log_sys->checkpoint_buf; + + checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN); + checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO); + + if (ut_dulint_cmp(limit_lsn, checkpoint_lsn) < 0) { + mutex_exit(&(log_sys->mutex)); + + return(DB_ERROR); + } + + /* Start reading the log groups from the checkpoint lsn up. The + variable flush_start_lsn tells a lsn up to which the log is known + to be contiguously written in all log groups. */ + + recovered_lsn = checkpoint_lsn; + flush_start_lsn = ut_dulint_align_down(checkpoint_lsn, + OS_FILE_LOG_BLOCK_SIZE); + up_to_date_group = max_cp_group; + + ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + while (group) { + finished = FALSE; + + if (group->state == LOG_GROUP_CORRUPTED) { + finished = TRUE; + } + + start_lsn = flush_start_lsn; + + while (!finished) { + end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE); + + log_group_read_log_seg(LOG_RECOVER, log_sys->buf, + group, start_lsn, end_lsn); + old_lsn = recovered_lsn; + + finished = recv_parse_and_hash_log_recs(log_sys->buf, + RECV_SCAN_SIZE, start_lsn, + limit_lsn, &flush_start_lsn, + &recovered_lsn); + + if (ut_dulint_cmp(recovered_lsn, old_lsn) > 0) { + + /* We found a more up-to-date group */ + up_to_date_group = group; + } + + start_lsn = end_lsn; + } + + group = UT_LIST_GET_NEXT(log_groups, group); + } + + /* Delete possible corrupted or extra log records from all log + groups */ + + recv_truncate_groups(recovered_lsn); + + /* Synchronize the uncorrupted log groups to the most up-to-date log + group; we may also have to copy checkpoint info to groups */ + + log_sys->next_checkpoint_lsn = checkpoint_lsn; + log_sys->next_checkpoint_no = checkpoint_no; + + recv_synchronize_groups(up_to_date_group, _lsn, max_cp_group); + + log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1); + + /* The database is now ready to start almost normal processing of user + transactions */ + + return(DB_SUCCESS); +} + +/************************************************************ +Completes recovery from a checkpoint. */ + +void +recv_recovery_from_checkpoint_finish(void) +/*======================================*/ +{ + /* Rollback the uncommitted transactions which have no user session */ + + trx_rollback_all_without_sess(); + + /* Merge the hashed log records */ + + recv_merge_hashed_log_recs(); + + /* Free the resources of the recovery system */ + + recv_sys_empty(); +} + +/**************************************************************** +Writes to the log a record about incrementing the row id counter. */ +UNIV_INLINE +void +log_write_row_id_incr_rec(void) +/*===========================*/ +{ + log_t* log = log_sys; + ulint data_len; + + mutex_enter(&(log->mutex)); + + data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + 1; + + if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { + + /* The string does not fit within the current log block + or the the block would become full */ + + mutex_exit(&(log->mutex)); + + log_write_row_id_incr_rec_slow(); + + return; + } + + *(log->buf + log->buf_free) = MLOG_INCR_ROW_ID | MLOG_SINGLE_REC_FLAG; + + log_block_set_data_len(ut_align_down(log->buf + log->buf_free, + OS_FILE_LOG_BLOCK_SIZE), + data_len); +#ifdef UNIV_LOG_DEBUG + log->old_buf_free = log->buf_free; + log->old_lsn = log->lsn; + log_check_log_recs(log->buf + log->buf_free, 1, log->lsn); +#endif + log->buf_free++; + + ut_ad(log->buf_free <= log->buf_size); + + UT_DULINT_INC(log->lsn); + + mutex_exit(&(log->mutex)); +} + +/**************************************************************** +Writes to the log a record about incrementing the row id counter. */ +static +void +log_write_row_id_incr_rec_slow(void) +/*================================*/ +{ + byte type; + + log_reserve_and_open(1); + + type = MLOG_INCR_ROW_ID | MLOG_SINGLE_REC_FLAG; + + log_write_low(&type, 1); + + log_close(); + + log_release(); +} + +/************************************************************************** +Parses and applies a log record MLOG_SET_ROW_ID. */ + +byte* +dict_hdr_parse_set_row_id( +/*======================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page) /* in: page or NULL */ +{ + dulint dval; + + ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval); + + if (ptr == NULL) { + + return(NULL); + } + + if (!page) { + + return(ptr); + } + + mach_write_to_8(page + DICT_HDR + DICT_HDR_ROW_ID, dval); + + return(ptr); +} + |