diff options
Diffstat (limited to 'innobase/log/log0log.c')
-rw-r--r-- | innobase/log/log0log.c | 2781 |
1 files changed, 2781 insertions, 0 deletions
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c new file mode 100644 index 00000000000..c6fec44d128 --- /dev/null +++ b/innobase/log/log0log.c @@ -0,0 +1,2781 @@ +/****************************************************** +Database log + +(c) 1995-1997 Innobase Oy + +Created 12/9/1995 Heikki Tuuri +*******************************************************/ + +#include "log0log.h" + +#ifdef UNIV_NONINL +#include "log0log.ic" +#endif + +#include "mem0mem.h" +#include "buf0buf.h" +#include "buf0flu.h" +#include "srv0srv.h" +#include "log0recv.h" +#include "fil0fil.h" +#include "dict0boot.h" +#include "srv0srv.h" +#include "trx0sys.h" +#include "trx0trx.h" + +/* Global log system variable */ +log_t* log_sys = NULL; + +ibool log_do_write = TRUE; +ibool log_debug_writes = FALSE; + +/* Pointer to this variable is used as the i/o-message when we do i/o to an +archive */ +byte log_archive_io; + +/* A margin for free space in the log buffer before a log entry is catenated */ +#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE) + +/* Margins for free space in the log buffer after a log entry is catenated */ +#define LOG_BUF_FLUSH_RATIO 2 +#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE) + +/* Margin for the free space in the smallest log group, before a new query +step which modifies the database, is started */ + +#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE) +#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE) + +/* This parameter controls asynchronous making of a new checkpoint; the value +should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */ + +#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32 + +/* This parameter controls synchronous preflushing of modified buffer pages */ +#define LOG_POOL_PREFLUSH_RATIO_SYNC 16 + +/* The same ratio for asynchronous preflushing; this value should be less than +the previous */ +#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8 + +/* Extra margin, in addition to one log file, used in archiving */ +#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE) + +/* This parameter controls asynchronous writing to the archive */ +#define LOG_ARCHIVE_RATIO_ASYNC 16 + +/* Codes used in unlocking flush latches */ +#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1 +#define LOG_UNLOCK_FLUSH_LOCK 2 + +/* States of an archiving operation */ +#define LOG_ARCHIVE_READ 1 +#define LOG_ARCHIVE_WRITE 2 + +/********************************************************** +Calculates the file count of an lsn within a log group. */ +static +ulint +log_group_calc_lsn_file_count( +/*==========================*/ + /* out: file count within the log group */ + dulint lsn, /* in: lsn, must be within 4 GB of + group->next_block_lsn */ + log_group_t* group); /* in: log group */ +/********************************************************** +Completes a checkpoint write i/o to a log file. */ +static +void +log_io_complete_checkpoint( +/*=======================*/ + log_group_t* group); /* in: log group */ +/********************************************************** +Completes an archiving i/o. */ +static +void +log_io_complete_archive(void); +/*=========================*/ +/******************************************************************** +Tries to establish a big enough margin of free space in the log groups, such +that a new log entry can be catenated without an immediate need for a +archiving. */ +static +void +log_archive_margin(void); +/*====================*/ + + +/******************************************************************** +Returns the oldest modified block lsn in the pool, or log_sys->lsn if none +exists. */ +static +dulint +log_buf_pool_get_oldest_modification(void) +/*======================================*/ +{ + dulint lsn; + + ut_ad(mutex_own(&(log_sys->mutex))); + + lsn = buf_pool_get_oldest_modification(); + + if (ut_dulint_is_zero(lsn)) { + + lsn = log_sys->lsn; + } + + return(lsn); +} + +/**************************************************************** +Opens the log for log_write_low. The log must be closed with log_close and +released with log_release. */ + +dulint +log_reserve_and_open( +/*=================*/ + /* out: start lsn of the log record */ + ulint len) /* in: length of data to be catenated */ +{ + log_t* log = log_sys; + ulint len_upper_limit; + ulint archived_lsn_age; + ulint count = 0; + ulint dummy; +loop: + mutex_enter(&(log->mutex)); + + /* Calculate an upper limit for the space the string may take in the + log buffer */ + + len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4; + + if (log->buf_free + len_upper_limit > log->buf_size) { + + mutex_exit(&(log->mutex)); + + /* Not enough free space, do a syncronous flush of the log + buffer */ + log_flush_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS); + + count++; + + ut_ad(count < 50); + + goto loop; + } + + if (log->archiving_state != LOG_ARCH_OFF) { + + archived_lsn_age = ut_dulint_minus(log->lsn, log->archived_lsn); + + if (archived_lsn_age + len_upper_limit + > log->max_archived_lsn_age) { + + /* Not enough free archived space in log groups: do a + synchronous archive write batch: */ + + mutex_exit(&(log->mutex)); + + ut_ad(len_upper_limit <= log->max_archived_lsn_age); + + log_archive_do(TRUE, &dummy); + + count++; + + ut_ad(count < 50); + + goto loop; + } + } + +#ifdef UNIV_LOG_DEBUG + log->old_buf_free = log->buf_free; + log->old_lsn = log->lsn; +#endif + return(log->lsn); +} + +/**************************************************************** +Writes to the log the string given. It is assumed that the caller holds the +log mutex. */ + +void +log_write_low( +/*==========*/ + byte* str, /* in: string */ + ulint str_len) /* in: string length */ +{ + log_t* log = log_sys; + ulint len; + ulint data_len; + byte* log_block; + + ut_ad(mutex_own(&(log->mutex))); +part_loop: + /* Calculate a part length */ + + data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len; + + if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { + + /* The string fits within the current log block */ + + len = str_len; + } else { + data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; + + len = OS_FILE_LOG_BLOCK_SIZE + - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + - LOG_BLOCK_TRL_SIZE; + } + + ut_memcpy(log->buf + log->buf_free, str, len); + + str_len -= len; + str = str + len; + + log_block = ut_align_down(log->buf + log->buf_free, + OS_FILE_LOG_BLOCK_SIZE); + log_block_set_data_len(log_block, data_len); + + if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { + /* This block became full */ + log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE); + log_block_set_checkpoint_no(log_block, + log_sys->next_checkpoint_no); + len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE; + + log->lsn = ut_dulint_add(log->lsn, len); + + /* Initialize the next block header and trailer */ + log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn); + } else { + log->lsn = ut_dulint_add(log->lsn, len); + } + + log->buf_free += len; + + ut_ad(log->buf_free <= log->buf_size); + + if (str_len > 0) { + goto part_loop; + } +} + +/**************************************************************** +Closes the log. */ + +dulint +log_close(void) +/*===========*/ + /* out: lsn */ +{ + byte* log_block; + ulint first_rec_group; + dulint oldest_lsn; + dulint lsn; + log_t* log = log_sys; + + ut_ad(mutex_own(&(log->mutex))); + + lsn = log->lsn; + + log_block = ut_align_down(log->buf + log->buf_free, + OS_FILE_LOG_BLOCK_SIZE); + first_rec_group = log_block_get_first_rec_group(log_block); + + if (first_rec_group == 0) { + /* We initialized a new log block which was not written + full by the current mtr: the next mtr log record group + will start within this block at the offset data_len */ + + log_block_set_first_rec_group(log_block, + log_block_get_data_len(log_block)); + } + + if (log->buf_free > log->max_buf_free) { + + log->check_flush_or_checkpoint = TRUE; + } + + if (ut_dulint_minus(lsn, log->last_checkpoint_lsn) + <= log->max_modified_age_async) { + goto function_exit; + } + + oldest_lsn = buf_pool_get_oldest_modification(); + + if (ut_dulint_is_zero(oldest_lsn) + || (ut_dulint_minus(lsn, oldest_lsn) + > log->max_modified_age_async) + || (ut_dulint_minus(lsn, log->last_checkpoint_lsn) + > log->max_checkpoint_age_async)) { + + log->check_flush_or_checkpoint = TRUE; + } +function_exit: + +#ifdef UNIV_LOG_DEBUG + log_check_log_recs(log->buf + log->old_buf_free, + log->buf_free - log->old_buf_free, log->old_lsn); +#endif + + return(lsn); +} + +/********************************************************** +Pads the current log block full with dummy log records. Used in producing +consistent archived log files. */ +static +void +log_pad_current_log_block(void) +/*===========================*/ +{ + byte b = MLOG_DUMMY_RECORD; + ulint pad_length; + ulint i; + dulint lsn; + + log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE); + + pad_length = OS_FILE_LOG_BLOCK_SIZE + - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE) + - LOG_BLOCK_TRL_SIZE; + + for (i = 0; i < pad_length; i++) { + log_write_low(&b, 1); + } + + lsn = log_sys->lsn; + + log_close(); + log_release(); + + ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE) + == LOG_BLOCK_HDR_SIZE); +} + +/********************************************************** +Calculates the data capacity of a log group, when the log file headers are not +included. */ + +ulint +log_group_get_capacity( +/*===================*/ + /* out: capacity in bytes */ + log_group_t* group) /* in: log group */ +{ + ut_ad(mutex_own(&(log_sys->mutex))); + + return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files); +} + +/********************************************************** +Calculates the offset within a log group, when the log file headers are not +included. */ +UNIV_INLINE +ulint +log_group_calc_size_offset( +/*=======================*/ + /* out: size offset (<= offset) */ + ulint offset, /* in: real offset within the log group */ + log_group_t* group) /* in: log group */ +{ + ut_ad(mutex_own(&(log_sys->mutex))); + + return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size)); +} + +/********************************************************** +Calculates the offset within a log group, when the log file headers are +included. */ +UNIV_INLINE +ulint +log_group_calc_real_offset( +/*=======================*/ + /* out: real offset (>= offset) */ + ulint offset, /* in: size offset within the log group */ + log_group_t* group) /* in: log group */ +{ + ut_ad(mutex_own(&(log_sys->mutex))); + + return(offset + LOG_FILE_HDR_SIZE + * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE))); +} + +/********************************************************** +Calculates the offset of an lsn within a log group. */ +static +ulint +log_group_calc_lsn_offset( +/*======================*/ + /* out: offset within the log group */ + dulint lsn, /* in: lsn, must be within 4 GB of group->lsn */ + log_group_t* group) /* in: log group */ +{ + dulint gr_lsn; + ulint gr_lsn_size_offset; + ulint difference; + ulint group_size; + ulint offset; + + ut_ad(mutex_own(&(log_sys->mutex))); + + gr_lsn = group->lsn; + + gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset, + group); + group_size = log_group_get_capacity(group); + + if (ut_dulint_cmp(lsn, gr_lsn) >= 0) { + + difference = ut_dulint_minus(lsn, gr_lsn); + } else { + difference = ut_dulint_minus(gr_lsn, lsn); + + difference = difference % group_size; + + difference = group_size - difference; + } + + offset = (gr_lsn_size_offset + difference) % group_size; + + return(log_group_calc_real_offset(offset, group)); +} + +/************************************************************ +Sets the field values in group to correspond to a given lsn. For this function +to work, the values must already be correctly initialized to correspond to +some lsn, for instance, a checkpoint lsn. */ + +void +log_group_set_fields( +/*=================*/ + log_group_t* group, /* in: group */ + dulint lsn) /* in: lsn for which the values should be + set */ +{ + group->lsn_offset = log_group_calc_lsn_offset(lsn, group); + group->lsn = lsn; +} + +/********************************************************************* +Calculates the recommended highest values for lsn - last_checkpoint_lsn, +lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */ +static +ibool +log_calc_max_ages(void) +/*===================*/ + /* out: error value FALSE if the smallest log group is + too small to accommodate the number of OS threads in + the database server */ +{ + log_group_t* group; + ulint n_threads; + ulint margin; + ulint free; + ibool success = TRUE; + ulint smallest_capacity; + ulint archive_margin; + ulint smallest_archive_margin; + + ut_ad(!mutex_own(&(log_sys->mutex))); + + n_threads = srv_get_n_threads(); + + mutex_enter(&(log_sys->mutex)); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + ut_ad(group); + + smallest_capacity = ULINT_MAX; + smallest_archive_margin = ULINT_MAX; + + while (group) { + if (log_group_get_capacity(group) < smallest_capacity) { + + smallest_capacity = log_group_get_capacity(group); + } + + archive_margin = log_group_get_capacity(group) + - (group->file_size - LOG_FILE_HDR_SIZE) + - LOG_ARCHIVE_EXTRA_MARGIN; + + if (archive_margin < smallest_archive_margin) { + + smallest_archive_margin = archive_margin; + } + + group = UT_LIST_GET_NEXT(log_groups, group); + } + + /* For each OS thread we must reserve so much free space in the + smallest log group that it can accommodate the log entries produced + by single query steps: running out of free log space is a serious + system error which requires rebooting the database. */ + + free = LOG_CHECKPOINT_FREE_PER_THREAD * n_threads + + LOG_CHECKPOINT_EXTRA_FREE; + if (free >= smallest_capacity / 2) { + success = FALSE; + } else { + margin = smallest_capacity - free; + } + + margin = ut_min(margin, log_sys->adm_checkpoint_interval); + + log_sys->max_modified_age_async = margin + - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC; + log_sys->max_modified_age_sync = margin + - margin / LOG_POOL_PREFLUSH_RATIO_SYNC; + + log_sys->max_checkpoint_age_async = margin - margin + / LOG_POOL_CHECKPOINT_RATIO_ASYNC; + log_sys->max_checkpoint_age = margin; + + log_sys->max_archived_lsn_age = smallest_archive_margin; + + log_sys->max_archived_lsn_age_async = smallest_archive_margin + - smallest_archive_margin / + LOG_ARCHIVE_RATIO_ASYNC; + mutex_exit(&(log_sys->mutex)); + + if (!success) { + printf( + "Error: log file group too small for the number of threads\n"); + } + + return(success); +} + +/********************************************************** +Initializes the log. */ + +void +log_init(void) +/*==========*/ +{ + byte* buf; + + log_sys = mem_alloc(sizeof(log_t)); + + mutex_create(&(log_sys->mutex)); + mutex_set_level(&(log_sys->mutex), SYNC_LOG); + + mutex_enter(&(log_sys->mutex)); + + /* Start the lsn from one log block from zero: this way every + log record has a start lsn != zero, a fact which we will use */ + + log_sys->lsn = LOG_START_LSN; + + ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE); + ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE); + + buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE); + log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE); + + log_sys->buf_size = LOG_BUFFER_SIZE; + log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO + - LOG_BUF_FLUSH_MARGIN; + log_sys->check_flush_or_checkpoint = TRUE; + UT_LIST_INIT(log_sys->log_groups); + + log_sys->n_log_ios = 0; + + /*----------------------------*/ + + log_sys->buf_next_to_write = 0; + + log_sys->written_to_some_lsn = log_sys->lsn; + log_sys->written_to_all_lsn = log_sys->lsn; + + log_sys->n_pending_writes = 0; + + log_sys->no_flush_event = os_event_create(NULL); + + os_event_set(log_sys->no_flush_event); + + log_sys->one_flushed_event = os_event_create(NULL); + + os_event_set(log_sys->one_flushed_event); + + /*----------------------------*/ + log_sys->adm_checkpoint_interval = ULINT_MAX; + + log_sys->next_checkpoint_no = ut_dulint_zero; + log_sys->last_checkpoint_lsn = log_sys->lsn; + log_sys->n_pending_checkpoint_writes = 0; + + rw_lock_create(&(log_sys->checkpoint_lock)); + rw_lock_set_level(&(log_sys->checkpoint_lock), SYNC_NO_ORDER_CHECK); + + log_sys->checkpoint_buf = ut_align( + mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), + OS_FILE_LOG_BLOCK_SIZE); + /*----------------------------*/ + + log_sys->archiving_state = LOG_ARCH_ON; + log_sys->archived_lsn = log_sys->lsn; + + log_sys->n_pending_archive_ios = 0; + + rw_lock_create(&(log_sys->archive_lock)); + rw_lock_set_level(&(log_sys->archive_lock), SYNC_NO_ORDER_CHECK); + + log_sys->archive_buf = ut_align( + ut_malloc(LOG_ARCHIVE_BUF_SIZE + + OS_FILE_LOG_BLOCK_SIZE), + OS_FILE_LOG_BLOCK_SIZE); + log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE; + + log_sys->archiving_on = os_event_create(NULL); + + /*----------------------------*/ + + log_sys->online_backup_state = FALSE; + + /*----------------------------*/ + + log_block_init(log_sys->buf, log_sys->lsn); + log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); + + log_sys->buf_free = LOG_BLOCK_HDR_SIZE; + log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE); + + mutex_exit(&(log_sys->mutex)); + +#ifdef UNIV_LOG_DEBUG + recv_sys_create(); + recv_sys_init(); + + recv_sys->parse_start_lsn = log_sys->lsn; + recv_sys->scanned_lsn = log_sys->lsn; + recv_sys->scanned_checkpoint_no = 0; + recv_sys->recovered_lsn = log_sys->lsn; + recv_sys->limit_lsn = ut_dulint_max; +#endif +} + +/********************************************************************** +Inits a log group to the log system. */ + +void +log_group_init( +/*===========*/ + ulint id, /* in: group id */ + ulint n_files, /* in: number of log files */ + ulint file_size, /* in: log file size in bytes */ + ulint space_id, /* in: space id of the file space + which contains the log files of this + group */ + ulint archive_space_id) /* in: space id of the file space + which contains some archived log + files for this group; currently, only + for the first log group this is + used */ +{ + ulint i; + + log_group_t* group; + + group = mem_alloc(sizeof(log_group_t)); + + group->id = id; + group->n_files = n_files; + group->file_size = file_size; + group->space_id = space_id; + group->state = LOG_GROUP_OK; + group->lsn = LOG_START_LSN; + group->lsn_offset = LOG_FILE_HDR_SIZE; + group->n_pending_writes = 0; + + group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files); + group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files); + + for (i = 0; i < n_files; i++) { + *(group->file_header_bufs + i) = ut_align( + mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE), + OS_FILE_LOG_BLOCK_SIZE); + *(group->archive_file_header_bufs + i) = ut_align( + mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE), + OS_FILE_LOG_BLOCK_SIZE); + } + + group->archive_space_id = archive_space_id; + + group->archived_file_no = 0; + group->archived_offset = 0; + + group->checkpoint_buf = ut_align( + mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), + OS_FILE_LOG_BLOCK_SIZE); + + UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group); + + ut_a(log_calc_max_ages()); +} + +/********************************************************************** +Does the unlockings needed in flush i/o completion. */ +UNIV_INLINE +void +log_flush_do_unlocks( +/*=================*/ + ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK + and LOG_UNLOCK_NONE_FLUSHED_LOCK */ +{ + ut_ad(mutex_own(&(log_sys->mutex))); + + /* NOTE that we must own the log mutex when doing the setting of the + events: this is because transactions will wait for these events to + be set, and at that moment the log flush they were waiting for must + have ended. If the log mutex were not reserved here, the i/o-thread + calling this function might be preempted for a while, and when it + resumed execution, it might be that a new flush had been started, and + this function would erroneously signal the NEW flush as completed. + Thus, the changes in the state of these events are performed + atomically in conjunction with the changes in the state of + log_sys->n_pending_writes etc. */ + + if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) { + os_event_set(log_sys->one_flushed_event); + } + + if (code & LOG_UNLOCK_FLUSH_LOCK) { + os_event_set(log_sys->no_flush_event); + } +} + +/********************************************************************** +Checks if a flush is completed for a log group and does the completion +routine if yes. */ +UNIV_INLINE +ulint +log_group_check_flush_completion( +/*=============================*/ + /* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */ + log_group_t* group) /* in: log group */ +{ + ut_ad(mutex_own(&(log_sys->mutex))); + + if (!log_sys->one_flushed && (group->n_pending_writes == 0)) { + + if (log_debug_writes) { + printf("Log flushed first to group %lu\n", group->id); + } + + log_sys->written_to_some_lsn = log_sys->flush_lsn; + log_sys->one_flushed = TRUE; + + return(LOG_UNLOCK_NONE_FLUSHED_LOCK); + } + + if (log_debug_writes && (group->n_pending_writes == 0)) { + + printf("Log flushed to group %lu\n", group->id); + } + + return(0); +} + +/********************************************************** +Checks if a flush is completed and does the completion routine if yes. */ +static +ulint +log_sys_check_flush_completion(void) +/*================================*/ + /* out: LOG_UNLOCK_FLUSH_LOCK or 0 */ +{ + ulint move_start; + ulint move_end; + + ut_ad(mutex_own(&(log_sys->mutex))); + + if (log_sys->n_pending_writes == 0) { + + log_sys->written_to_all_lsn = log_sys->flush_lsn; + log_sys->buf_next_to_write = log_sys->flush_end_offset; + + if (log_sys->flush_end_offset > log_sys->max_buf_free / 2) { + /* Move the log buffer content to the start of the + buffer */ + + move_start = ut_calc_align_down( + log_sys->flush_end_offset, + OS_FILE_LOG_BLOCK_SIZE); + move_end = ut_calc_align(log_sys->buf_free, + OS_FILE_LOG_BLOCK_SIZE); + + ut_memmove(log_sys->buf, log_sys->buf + move_start, + move_end - move_start); + log_sys->buf_free -= move_start; + + log_sys->buf_next_to_write -= move_start; + } + + return(LOG_UNLOCK_FLUSH_LOCK); + } + + return(0); +} + +/********************************************************** +Completes an i/o to a log file. */ + +void +log_io_complete( +/*============*/ + log_group_t* group) /* in: log group or a dummy pointer */ +{ + ulint unlock; + + if ((byte*)group == &log_archive_io) { + /* It was an archive write */ + + log_io_complete_archive(); + + return; + } + + if ((ulint)group & 0x1) { + /* It was a checkpoint write */ + group = (log_group_t*)((ulint)group - 1); + + fil_flush(group->space_id); + + log_io_complete_checkpoint(group); + + return; + } + + fil_flush(group->space_id); + + mutex_enter(&(log_sys->mutex)); + + ut_ad(group->n_pending_writes > 0); + ut_ad(log_sys->n_pending_writes > 0); + + group->n_pending_writes--; + log_sys->n_pending_writes--; + + unlock = log_group_check_flush_completion(group); + unlock = unlock | log_sys_check_flush_completion(); + + log_flush_do_unlocks(unlock); + + mutex_exit(&(log_sys->mutex)); +} + +/********************************************************** +Writes a log file header to a log file space. */ +static +void +log_group_file_header_flush( +/*========================*/ + ulint type, /* in: LOG_FLUSH or LOG_RECOVER */ + log_group_t* group, /* in: log group */ + ulint nth_file, /* in: header to the nth file in the + log file space */ + dulint start_lsn) /* in: log file data starts at this + lsn */ +{ + byte* buf; + ulint dest_offset; + ibool sync; + + ut_ad(mutex_own(&(log_sys->mutex))); + + ut_a(nth_file < group->n_files); + + buf = *(group->file_header_bufs + nth_file); + + mach_write_to_4(buf + LOG_GROUP_ID, group->id); + mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); + + dest_offset = nth_file * group->file_size; + + sync = FALSE; + + if (type == LOG_RECOVER) { + + sync = TRUE; + } + + if (log_debug_writes) { + printf( + "Writing log file header to group %lu file %lu\n", group->id, + nth_file); + } + + if (log_do_write) { + if (type == LOG_FLUSH) { + log_sys->n_pending_writes++; + group->n_pending_writes++; + } + + log_sys->n_log_ios++; + + fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id, + dest_offset / UNIV_PAGE_SIZE, + dest_offset % UNIV_PAGE_SIZE, + OS_FILE_LOG_BLOCK_SIZE, + buf, group); + } +} + +/********************************************************** +Writes a buffer to a log file group. */ + +void +log_group_write_buf( +/*================*/ + ulint type, /* in: LOG_FLUSH or LOG_RECOVER */ + log_group_t* group, /* in: log group */ + byte* buf, /* in: buffer */ + ulint len, /* in: buffer len; must be divisible + by OS_FILE_LOG_BLOCK_SIZE */ + dulint start_lsn, /* in: start lsn of the buffer; must + be divisible by + OS_FILE_LOG_BLOCK_SIZE */ + ulint new_data_offset)/* in: start offset of new data in + buf: this parameter is used to decide + if we have to write a new log file + header */ +{ + ulint write_len; + ibool sync; + ibool write_header; + ulint next_offset; + + ut_ad(mutex_own(&(log_sys->mutex))); + ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); + + sync = FALSE; + + if (type == LOG_RECOVER) { + + sync = TRUE; + } + + if (new_data_offset == 0) { + write_header = TRUE; + } else { + write_header = FALSE; + } +loop: + if (len == 0) { + + return; + } + + next_offset = log_group_calc_lsn_offset(start_lsn, group); + + if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE) + && write_header) { + /* We start to write a new log file instance in the group */ + + log_group_file_header_flush(type, group, + next_offset / group->file_size, start_lsn); + } + + if ((next_offset % group->file_size) + len > group->file_size) { + + write_len = group->file_size - (next_offset % group->file_size); + } else { + write_len = len; + } + + if (log_debug_writes) { + printf( + "Writing log file segment to group %lu offset %lu len %lu\n", + group->id, next_offset, write_len); + } + + if (log_do_write) { + if (type == LOG_FLUSH) { + log_sys->n_pending_writes++; + group->n_pending_writes++; + } + + log_sys->n_log_ios++; + + fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id, + next_offset / UNIV_PAGE_SIZE, + next_offset % UNIV_PAGE_SIZE, write_len, buf, group); + } + + if (write_len < len) { + start_lsn = ut_dulint_add(start_lsn, write_len); + len -= write_len; + buf += write_len; + + write_header = TRUE; + + goto loop; + } +} + +/********************************************************** +This function is called, e.g., when a transaction wants to commit. It checks +that the log has been flushed to disk up to the last log entry written by the +transaction. If there is a flush running, it waits and checks if the flush +flushed enough. If not, starts a new flush. */ + +void +log_flush_up_to( +/*============*/ + dulint lsn, /* in: log sequence number up to which the log should + be flushed, ut_dulint_max if not specified */ + ulint wait) /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ +{ + log_group_t* group; + ulint start_offset; + ulint end_offset; + ulint area_start; + ulint area_end; + ulint loop_count; + + if (recv_no_ibuf_operations) { + /* Recovery is running and no operations on the log files are + allowed yet (the variable name .._no_ibuf_.. is misleading) */ + + return; + } + + loop_count = 0; +loop: + loop_count++; + + ut_ad(loop_count < 5); + + if (loop_count > 2) { +/* printf("Log loop count %lu\n", loop_count); */ + } + + mutex_enter(&(log_sys->mutex)); + + if ((ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0) + || ((ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) >= 0) + && (wait != LOG_WAIT_ALL_GROUPS))) { + + mutex_exit(&(log_sys->mutex)); + + return; + } + + if (log_sys->n_pending_writes > 0) { + /* A flush is running */ + + if (ut_dulint_cmp(log_sys->flush_lsn, lsn) >= 0) { + /* The flush will flush enough: wait for it to + complete */ + + goto do_waits; + } + + mutex_exit(&(log_sys->mutex)); + + /* Wait for the flush to complete and try to start a new + flush */ + + os_event_wait(log_sys->no_flush_event); + + goto loop; + } + + if (log_sys->buf_free == log_sys->buf_next_to_write) { + /* Nothing to flush */ + + mutex_exit(&(log_sys->mutex)); + + return; + } + + if (log_debug_writes) { + printf("Flushing log from %lu %lu up to lsn %lu %lu\n", + ut_dulint_get_high(log_sys->written_to_all_lsn), + ut_dulint_get_low(log_sys->written_to_all_lsn), + ut_dulint_get_high(log_sys->lsn), + ut_dulint_get_low(log_sys->lsn)); + } + + os_event_reset(log_sys->no_flush_event); + os_event_reset(log_sys->one_flushed_event); + + start_offset = log_sys->buf_next_to_write; + end_offset = log_sys->buf_free; + + area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE); + area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE); + + ut_ad(area_end - area_start > 0); + + log_sys->flush_lsn = log_sys->lsn; + log_sys->one_flushed = FALSE; + + log_block_set_flush_bit(log_sys->buf + area_start, TRUE); + log_block_set_checkpoint_no( + log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, + log_sys->next_checkpoint_no); + + /* Copy the last, incompletely written, log block a log block length + up, so that when the flush operation writes from the log buffer, the + segment to write will not be changed by writers to the log */ + + ut_memcpy(log_sys->buf + area_end, + log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, + OS_FILE_LOG_BLOCK_SIZE); + + log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE; + log_sys->flush_end_offset = log_sys->buf_free; + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + while (group) { + log_group_write_buf(LOG_FLUSH, group, + log_sys->buf + area_start, + area_end - area_start, + ut_dulint_align_down(log_sys->written_to_all_lsn, + OS_FILE_LOG_BLOCK_SIZE), + start_offset - area_start); + + log_group_set_fields(group, log_sys->flush_lsn); + + group = UT_LIST_GET_NEXT(log_groups, group); + } + +do_waits: + mutex_exit(&(log_sys->mutex)); + + if (wait == LOG_WAIT_ONE_GROUP) { + os_event_wait(log_sys->one_flushed_event); + } else if (wait == LOG_WAIT_ALL_GROUPS) { + os_event_wait(log_sys->no_flush_event); + } else { + ut_ad(wait == LOG_NO_WAIT); + } +} + +/******************************************************************** +Tries to establish a big enough margin of free space in the log buffer, such +that a new log entry can be catenated without an immediate need for a flush. */ +static +void +log_flush_margin(void) +/*==================*/ +{ + ibool do_flush = FALSE; + log_t* log = log_sys; + + mutex_enter(&(log->mutex)); + + if (log->buf_free > log->max_buf_free) { + + if (log->n_pending_writes > 0) { + /* A flush is running: hope that it will provide enough + free space */ + } else { + do_flush = TRUE; + } + } + + mutex_exit(&(log->mutex)); + + if (do_flush) { + log_flush_up_to(ut_dulint_max, LOG_NO_WAIT); + } +} + +/******************************************************************** +Advances the smallest lsn for which there are unflushed dirty blocks in the +buffer pool. NOTE: this function may only be called if the calling thread owns +no synchronization objects! */ + +ibool +log_preflush_pool_modified_pages( +/*=============================*/ + /* out: FALSE if there was a flush batch of + the same type running, which means that we + could not start this flush batch */ + dulint new_oldest, /* in: try to advance oldest_modified_lsn + at least to this lsn */ + ibool sync) /* in: TRUE if synchronous operation is + desired */ +{ + ulint n_pages; + + if (recv_recovery_on) { + /* If the recovery is running, we must first apply all + log records to their respective file pages to get the + right modify lsn values to these pages: otherwise, there + might be pages on disk which are not yet recovered to the + current lsn, and even after calling this function, we could + not know how up-to-date the disk version of the database is, + and we could not make a new checkpoint on the basis of the + info on the buffer pool only. */ + + recv_apply_hashed_log_recs(TRUE); + } + + n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest); + + if (sync) { + buf_flush_wait_batch_end(BUF_FLUSH_LIST); + } + + if (n_pages == ULINT_UNDEFINED) { + + return(FALSE); + } + + return(TRUE); +} + +/********************************************************** +Completes a checkpoint. */ +static +void +log_complete_checkpoint(void) +/*=========================*/ +{ + ut_ad(mutex_own(&(log_sys->mutex))); + ut_ad(log_sys->n_pending_checkpoint_writes == 0); + + log_sys->next_checkpoint_no + = ut_dulint_add(log_sys->next_checkpoint_no, 1); + + log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn; + + rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT); +} + +/********************************************************** +Completes an asynchronous checkpoint info write i/o to a log file. */ +static +void +log_io_complete_checkpoint( +/*=======================*/ + log_group_t* group) /* in: log group */ +{ + mutex_enter(&(log_sys->mutex)); + + ut_ad(log_sys->n_pending_checkpoint_writes > 0); + + log_sys->n_pending_checkpoint_writes--; + + if (log_debug_writes) { + printf("Checkpoint info written to group %lu\n", group->id); + } + + if (log_sys->n_pending_checkpoint_writes == 0) { + log_complete_checkpoint(); + } + + mutex_exit(&(log_sys->mutex)); +} + +/*********************************************************************** +Writes info to a checkpoint about a log group. */ +static +void +log_checkpoint_set_nth_group_info( +/*==============================*/ + byte* buf, /* in: buffer for checkpoint info */ + ulint n, /* in: nth slot */ + ulint file_no,/* in: archived file number */ + ulint offset) /* in: archived file offset */ +{ + ut_ad(n < LOG_MAX_N_GROUPS); + + mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY + + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no); + mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY + + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset); +} + +/*********************************************************************** +Gets info from a checkpoint about a log group. */ + +void +log_checkpoint_get_nth_group_info( +/*==============================*/ + byte* buf, /* in: buffer containing checkpoint info */ + ulint n, /* in: nth slot */ + ulint* file_no,/* out: archived file number */ + ulint* offset) /* out: archived file offset */ +{ + ut_ad(n < LOG_MAX_N_GROUPS); + + *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY + + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO); + *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY + + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET); +} + +/********************************************************** +Writes the checkpoint info to a log group header. */ +static +void +log_group_checkpoint( +/*=================*/ + log_group_t* group) /* in: log group */ +{ + log_group_t* group2; + dulint archived_lsn; + dulint next_archived_lsn; + ulint write_offset; + ulint fold; + byte* buf; + ulint i; + + ut_ad(mutex_own(&(log_sys->mutex))); + ut_ad(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE); + + buf = group->checkpoint_buf; + + mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no); + mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn); + + mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET, + log_group_calc_lsn_offset( + log_sys->next_checkpoint_lsn, group)); + + mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size); + + if (log_sys->archiving_state == LOG_ARCH_OFF) { + archived_lsn = ut_dulint_max; + } else { + archived_lsn = log_sys->archived_lsn; + + if (0 != ut_dulint_cmp(archived_lsn, + log_sys->next_archived_lsn)) { + next_archived_lsn = log_sys->next_archived_lsn; + /* For debugging only */ + } + } + + mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn); + + for (i = 0; i < LOG_MAX_N_GROUPS; i++) { + log_checkpoint_set_nth_group_info(buf, i, 0, 0); + } + + group2 = UT_LIST_GET_FIRST(log_sys->log_groups); + + while (group2) { + log_checkpoint_set_nth_group_info(buf, group2->id, + group2->archived_file_no, + group2->archived_offset); + + group2 = UT_LIST_GET_NEXT(log_groups, group2); + } + + fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); + mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold); + + fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, + LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); + mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold); + + /* We alternate the physical place of the checkpoint info in the first + log file */ + + if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) { + write_offset = LOG_CHECKPOINT_1; + } else { + write_offset = LOG_CHECKPOINT_2; + } + + if (log_do_write) { + if (log_sys->n_pending_checkpoint_writes == 0) { + + rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), + LOG_CHECKPOINT); + } + + log_sys->n_pending_checkpoint_writes++; + + log_sys->n_log_ios++; + + /* We send as the last parameter the group machine address + added with 1, as we want to distinguish between a normal log + file write and a checkpoint field write */ + + fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, + write_offset / UNIV_PAGE_SIZE, + write_offset % UNIV_PAGE_SIZE, + OS_FILE_LOG_BLOCK_SIZE, + buf, ((byte*)group + 1)); + + ut_ad(((ulint)group & 0x1) == 0); + } +} + +/********************************************************** +Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */ + +void +log_group_read_checkpoint_info( +/*===========================*/ + log_group_t* group, /* in: log group */ + ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */ +{ + ut_ad(mutex_own(&(log_sys->mutex))); + + log_sys->n_log_ios++; + + fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, + field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE, + OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL); +} + +/********************************************************** +Writes checkpoint info to groups. */ + +void +log_groups_write_checkpoint_info(void) +/*==================================*/ +{ + log_group_t* group; + + ut_ad(mutex_own(&(log_sys->mutex))); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + while (group) { + log_group_checkpoint(group); + + group = UT_LIST_GET_NEXT(log_groups, group); + } +} + +/********************************************************** +Makes a checkpoint. Note that this function does not flush dirty +blocks from the buffer pool: it only checks what is lsn of the oldest +modification in the pool, and writes information about the lsn in +log files. Use log_make_checkpoint_at to flush also the pool. */ + +ibool +log_checkpoint( +/*===========*/ + /* out: TRUE if success, FALSE if a checkpoint + write was already running */ + ibool sync, /* in: TRUE if synchronous operation is + desired */ + ibool write_always) /* in: the function normally checks if the + the new checkpoint would have a greater + lsn than the previous one: if not, then no + physical write is done; by setting this + parameter TRUE, a physical write will always be + made to log files */ +{ + dulint oldest_lsn; + + if (recv_recovery_is_on()) { + recv_apply_hashed_log_recs(TRUE); + } + + fil_flush_file_spaces(FIL_TABLESPACE); + + mutex_enter(&(log_sys->mutex)); + + oldest_lsn = log_buf_pool_get_oldest_modification(); + + mutex_exit(&(log_sys->mutex)); + + /* Because log also contains headers and dummy log records, + if the buffer pool contains no dirty buffers, oldest_lsn + gets the value log_sys->lsn from the previous function, + and we must make sure that the log is flushed up to that + lsn. If there are dirty buffers in the buffer pool, then our + write-ahead-logging algorithm ensures that the log has been flushed + up to oldest_lsn. */ + + log_flush_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS); + + mutex_enter(&(log_sys->mutex)); + + if (!write_always && ut_dulint_cmp( + log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) { + + mutex_exit(&(log_sys->mutex)); + + return(TRUE); + } + + ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0); + + if (log_sys->n_pending_checkpoint_writes > 0) { + /* A checkpoint write is running */ + + mutex_exit(&(log_sys->mutex)); + + if (sync) { + /* Wait for the checkpoint write to complete */ + rw_lock_s_lock(&(log_sys->checkpoint_lock)); + rw_lock_s_unlock(&(log_sys->checkpoint_lock)); + } + + return(FALSE); + } + + log_sys->next_checkpoint_lsn = oldest_lsn; + + if (log_debug_writes) { + printf("Making checkpoint no %lu at lsn %lu %lu\n", + ut_dulint_get_low(log_sys->next_checkpoint_no), + ut_dulint_get_high(oldest_lsn), + ut_dulint_get_low(oldest_lsn)); + } + + log_groups_write_checkpoint_info(); + + mutex_exit(&(log_sys->mutex)); + + if (sync) { + /* Wait for the checkpoint write to complete */ + rw_lock_s_lock(&(log_sys->checkpoint_lock)); + rw_lock_s_unlock(&(log_sys->checkpoint_lock)); + } + + return(TRUE); +} + +/******************************************************************** +Makes a checkpoint at a given lsn or later. */ + +void +log_make_checkpoint_at( +/*===================*/ + dulint lsn, /* in: make a checkpoint at this or a later + lsn, if ut_dulint_max, makes a checkpoint at + the latest lsn */ + ibool write_always) /* in: the function normally checks if the + the new checkpoint would have a greater + lsn than the previous one: if not, then no + physical write is done; by setting this + parameter TRUE, a physical write will always be + made to log files */ +{ + ibool success; + + /* Preflush pages synchronously */ + + success = FALSE; + + while (!success) { + success = log_preflush_pool_modified_pages(lsn, TRUE); + } + + success = FALSE; + + while (!success) { + success = log_checkpoint(TRUE, write_always); + } +} + +/******************************************************************** +Tries to establish a big enough margin of free space in the log groups, such +that a new log entry can be catenated without an immediate need for a +checkpoint. NOTE: this function may only be called if the calling thread +owns no synchronization objects! */ +static +void +log_checkpoint_margin(void) +/*=======================*/ +{ + log_t* log = log_sys; + ulint age; + ulint checkpoint_age; + ulint advance; + dulint oldest_lsn; + dulint new_oldest; + ibool do_preflush; + ibool sync; + ibool checkpoint_sync; + ibool do_checkpoint; + ibool success; +loop: + sync = FALSE; + checkpoint_sync = FALSE; + do_preflush = FALSE; + do_checkpoint = FALSE; + + mutex_enter(&(log->mutex)); + + if (log->check_flush_or_checkpoint == FALSE) { + mutex_exit(&(log->mutex)); + + return; + } + + oldest_lsn = log_buf_pool_get_oldest_modification(); + + age = ut_dulint_minus(log->lsn, oldest_lsn); + + if (age > log->max_modified_age_sync) { + + /* A flush is urgent: we have to do a synchronous preflush */ + + sync = TRUE; + + advance = 2 * (age - log->max_modified_age_sync); + + new_oldest = ut_dulint_add(oldest_lsn, advance); + + do_preflush = TRUE; + + } else if (age > log->max_modified_age_async) { + + /* A flush is not urgent: we do an asynchronous preflush */ + advance = age - log->max_modified_age_async; + + new_oldest = ut_dulint_add(oldest_lsn, advance); + + do_preflush = TRUE; + } + + checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn); + + if (checkpoint_age > log->max_checkpoint_age) { + /* A checkpoint is urgent: we do it synchronously */ + + checkpoint_sync = TRUE; + + do_checkpoint = TRUE; + + } else if (checkpoint_age > log->max_checkpoint_age_async) { + /* A checkpoint is not urgent: do it asynchronously */ + + do_checkpoint = TRUE; + + log->check_flush_or_checkpoint = FALSE; + } else { + log->check_flush_or_checkpoint = FALSE; + } + + mutex_exit(&(log->mutex)); + + if (do_preflush) { + success = log_preflush_pool_modified_pages(new_oldest, sync); + + /* If the flush succeeded, this thread has done its part + and can proceed. If it did not succeed, there was another + thread doing a flush at the same time. If sync was FALSE, + the flush was not urgent, and we let this thread proceed. + Otherwise, we let it start from the beginning again. */ + + if (sync && !success) { + mutex_enter(&(log->mutex)); + + log->check_flush_or_checkpoint = TRUE; + + mutex_exit(&(log->mutex)); + goto loop; + } + } + + if (do_checkpoint) { + log_checkpoint(checkpoint_sync, FALSE); + + if (checkpoint_sync) { + + goto loop; + } + } +} + +/********************************************************** +Reads a specified log segment to a buffer. */ + +void +log_group_read_log_seg( +/*===================*/ + ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */ + byte* buf, /* in: buffer where to read */ + log_group_t* group, /* in: log group */ + dulint start_lsn, /* in: read area start */ + dulint end_lsn) /* in: read area end */ +{ + ulint len; + ulint source_offset; + ibool sync; + + ut_ad(mutex_own(&(log_sys->mutex))); + + sync = FALSE; + + if (type == LOG_RECOVER) { + sync = TRUE; + } +loop: + source_offset = log_group_calc_lsn_offset(start_lsn, group); + + len = ut_dulint_minus(end_lsn, start_lsn); + + ut_ad(len != 0); + + if ((source_offset % group->file_size) + len > group->file_size) { + + len = group->file_size - (source_offset % group->file_size); + } + + if (type == LOG_ARCHIVE) { + + log_sys->n_pending_archive_ios++; + } + + log_sys->n_log_ios++; + + fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, + source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE, + len, buf, &log_archive_io); + + start_lsn = ut_dulint_add(start_lsn, len); + buf += len; + + if (ut_dulint_cmp(start_lsn, end_lsn) != 0) { + + goto loop; + } +} + +/********************************************************** +Generates an archived log file name. */ + +void +log_archived_file_name_gen( +/*=======================*/ + char* buf, /* in: buffer where to write */ + ulint id, /* in: group id */ + ulint file_no)/* in: file number */ +{ + UT_NOT_USED(id); /* Currently we only archive the first group */ + + sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, file_no); +} + +/********************************************************** +Writes a log file header to a log file space. */ +static +void +log_group_archive_file_header_write( +/*================================*/ + log_group_t* group, /* in: log group */ + ulint nth_file, /* in: header to the nth file in the + archive log file space */ + ulint file_no, /* in: archived file number */ + dulint start_lsn) /* in: log file data starts at this + lsn */ +{ + byte* buf; + ulint dest_offset; + + ut_ad(mutex_own(&(log_sys->mutex))); + + ut_a(nth_file < group->n_files); + + buf = *(group->archive_file_header_bufs + nth_file); + + mach_write_to_4(buf + LOG_GROUP_ID, group->id); + mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); + mach_write_to_4(buf + LOG_FILE_NO, file_no); + + mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE); + + dest_offset = nth_file * group->file_size; + + log_sys->n_log_ios++; + + fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id, + dest_offset / UNIV_PAGE_SIZE, + dest_offset % UNIV_PAGE_SIZE, + 2 * OS_FILE_LOG_BLOCK_SIZE, + buf, &log_archive_io); +} + +/********************************************************** +Writes a log file header to a completed archived log file. */ +static +void +log_group_archive_completed_header_write( +/*=====================================*/ + log_group_t* group, /* in: log group */ + ulint nth_file, /* in: header to the nth file in the + archive log file space */ + dulint end_lsn) /* in: end lsn of the file */ +{ + byte* buf; + ulint dest_offset; + + ut_ad(mutex_own(&(log_sys->mutex))); + ut_a(nth_file < group->n_files); + + buf = *(group->archive_file_header_bufs + nth_file); + + mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE); + mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn); + + dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED; + + log_sys->n_log_ios++; + + fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id, + dest_offset / UNIV_PAGE_SIZE, + dest_offset % UNIV_PAGE_SIZE, + OS_FILE_LOG_BLOCK_SIZE, + buf + LOG_FILE_ARCH_COMPLETED, + &log_archive_io); +} + +/********************************************************** +Does the archive writes for a single log group. */ +static +void +log_group_archive( +/*==============*/ + log_group_t* group) /* in: log group */ +{ + os_file_t file_handle; + dulint start_lsn; + dulint end_lsn; + char name[100]; + byte* buf; + ulint len; + ibool ret; + ulint next_offset; + ulint n_files; + ulint open_mode; + + ut_ad(mutex_own(&(log_sys->mutex))); + + start_lsn = log_sys->archived_lsn; + + ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); + + end_lsn = log_sys->next_archived_lsn; + + ut_ad(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); + + buf = log_sys->archive_buf; + + n_files = 0; + + next_offset = group->archived_offset; +loop: + if ((next_offset % group->file_size == 0) + || (fil_space_get_size(group->archive_space_id) == 0)) { + + /* Add the file to the archive file space; create or open the + file */ + + if (next_offset % group->file_size == 0) { + open_mode = OS_FILE_CREATE; + } else { + open_mode = OS_FILE_OPEN; + } + + log_archived_file_name_gen(name, group->id, + group->archived_file_no + n_files); + fil_reserve_right_to_open(); + + file_handle = os_file_create(name, open_mode, OS_FILE_AIO, + &ret); + if (!ret && (open_mode == OS_FILE_CREATE)) { + file_handle = os_file_create(name, OS_FILE_OPEN, + OS_FILE_AIO, &ret); + } + + ut_a(ret); + + if (log_debug_writes) { + printf("Created archive file %s\n", name); + } + + ret = os_file_close(file_handle); + + ut_a(ret); + + fil_release_right_to_open(); + + /* Add the archive file as a node to the space */ + + fil_node_create(name, group->file_size / UNIV_PAGE_SIZE, + group->archive_space_id); + + if (next_offset % group->file_size == 0) { + log_group_archive_file_header_write(group, n_files, + group->archived_file_no + n_files, + start_lsn); + + next_offset += LOG_FILE_HDR_SIZE; + } + } + + len = ut_dulint_minus(end_lsn, start_lsn); + + if (group->file_size < (next_offset % group->file_size) + len) { + + len = group->file_size - (next_offset % group->file_size); + } + + if (log_debug_writes) { + printf( + "Archiving starting at lsn %lu %lu, len %lu to group %lu\n", + ut_dulint_get_high(start_lsn), + ut_dulint_get_low(start_lsn), + len, group->id); + } + + log_sys->n_pending_archive_ios++; + + log_sys->n_log_ios++; + + fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id, + next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE, + ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf, + &log_archive_io); + + start_lsn = ut_dulint_add(start_lsn, len); + next_offset += len; + buf += len; + + if (next_offset % group->file_size == 0) { + n_files++; + } + + if (ut_dulint_cmp(end_lsn, start_lsn) != 0) { + + goto loop; + } + + group->next_archived_file_no = group->archived_file_no + n_files; + group->next_archived_offset = next_offset % group->file_size; + + ut_ad(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0); +} + +/********************************************************* +(Writes to the archive of each log group.) Currently, only the first +group is archived. */ +static +void +log_archive_groups(void) +/*====================*/ +{ + log_group_t* group; + + ut_ad(mutex_own(&(log_sys->mutex))); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + log_group_archive(group); +} + +/********************************************************* +Completes the archiving write phase for (each log group), currently, +the first log group. */ +static +void +log_archive_write_complete_groups(void) +/*===================================*/ +{ + log_group_t* group; + ulint end_offset; + ulint trunc_files; + ulint n_files; + dulint start_lsn; + dulint end_lsn; + ulint i; + + ut_ad(mutex_own(&(log_sys->mutex))); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + group->archived_file_no = group->next_archived_file_no; + group->archived_offset = group->next_archived_offset; + + /* Truncate from the archive file space all but the last + file, or if it has been written full, all files */ + + n_files = (UNIV_PAGE_SIZE + * fil_space_get_size(group->archive_space_id)) + / group->file_size; + ut_ad(n_files > 0); + + end_offset = group->archived_offset; + + if (end_offset % group->file_size == 0) { + + trunc_files = n_files; + } else { + trunc_files = n_files - 1; + } + + if (log_debug_writes && trunc_files) { + printf("Complete file(s) archived to group %lu\n", + group->id); + } + + /* Calculate the archive file space start lsn */ + start_lsn = ut_dulint_subtract(log_sys->next_archived_lsn, + end_offset - LOG_FILE_HDR_SIZE + + trunc_files + * (group->file_size - LOG_FILE_HDR_SIZE)); + end_lsn = start_lsn; + + for (i = 0; i < trunc_files; i++) { + + end_lsn = ut_dulint_add(end_lsn, + group->file_size - LOG_FILE_HDR_SIZE); + + /* Write a notice to the headers of archived log + files that the file write has been completed */ + + log_group_archive_completed_header_write(group, i, end_lsn); + } + + fil_space_truncate_start(group->archive_space_id, + trunc_files * group->file_size); + + if (log_debug_writes) { + printf("Archiving writes completed\n"); + } +} + +/********************************************************** +Completes an archiving i/o. */ +static +void +log_archive_check_completion_low(void) +/*==================================*/ +{ + ut_ad(mutex_own(&(log_sys->mutex))); + + if (log_sys->n_pending_archive_ios == 0 + && log_sys->archiving_phase == LOG_ARCHIVE_READ) { + + if (log_debug_writes) { + printf("Archiving read completed\n"); + } + + /* Archive buffer has now been read in: start archive writes */ + + log_sys->archiving_phase = LOG_ARCHIVE_WRITE; + + log_archive_groups(); + } + + if (log_sys->n_pending_archive_ios == 0 + && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) { + + log_archive_write_complete_groups(); + + log_sys->archived_lsn = log_sys->next_archived_lsn; + + rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE); + } +} + +/********************************************************** +Completes an archiving i/o. */ +static +void +log_io_complete_archive(void) +/*=========================*/ +{ + log_group_t* group; + + mutex_enter(&(log_sys->mutex)); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + mutex_exit(&(log_sys->mutex)); + + fil_flush(group->archive_space_id); + + mutex_enter(&(log_sys->mutex)); + + ut_ad(log_sys->n_pending_archive_ios > 0); + + log_sys->n_pending_archive_ios--; + + log_archive_check_completion_low(); + + mutex_exit(&(log_sys->mutex)); +} + +/************************************************************************ +Starts an archiving operation. */ + +ibool +log_archive_do( +/*===========*/ + /* out: TRUE if succeed, FALSE if an archiving + operation was already running */ + ibool sync, /* in: TRUE if synchronous operation is desired */ + ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to + archive */ +{ + ibool calc_new_limit; + dulint start_lsn; + dulint limit_lsn; + + calc_new_limit = TRUE; +loop: + mutex_enter(&(log_sys->mutex)); + + if (log_sys->archiving_state == LOG_ARCH_OFF) { + mutex_exit(&(log_sys->mutex)); + + *n_bytes = 0; + + return(TRUE); + + } else if (log_sys->archiving_state == LOG_ARCH_STOPPED + || log_sys->archiving_state == LOG_ARCH_STOPPING2) { + + mutex_exit(&(log_sys->mutex)); + + os_event_wait(log_sys->archiving_on); + + mutex_enter(&(log_sys->mutex)); + + goto loop; + } + + start_lsn = log_sys->archived_lsn; + + if (calc_new_limit) { + ut_ad(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0); + + limit_lsn = ut_dulint_add(start_lsn, + log_sys->archive_buf_size); + + *n_bytes = log_sys->archive_buf_size; + + if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) { + + limit_lsn = ut_dulint_align_down(log_sys->lsn, + OS_FILE_LOG_BLOCK_SIZE); + } + } + + if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) { + + mutex_exit(&(log_sys->mutex)); + + *n_bytes = 0; + + return(TRUE); + } + + if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) { + + mutex_exit(&(log_sys->mutex)); + + log_flush_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS); + + calc_new_limit = FALSE; + + goto loop; + } + + if (log_sys->n_pending_archive_ios > 0) { + /* An archiving operation is running */ + + mutex_exit(&(log_sys->mutex)); + + if (sync) { + rw_lock_s_lock(&(log_sys->archive_lock)); + rw_lock_s_unlock(&(log_sys->archive_lock)); + } + + *n_bytes = log_sys->archive_buf_size; + + return(FALSE); + } + + rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE); + + log_sys->archiving_phase = LOG_ARCHIVE_READ; + + log_sys->next_archived_lsn = limit_lsn; + + if (log_debug_writes) { + printf("Archiving from lsn %lu %lu to lsn %lu %lu\n", + ut_dulint_get_high(log_sys->archived_lsn), + ut_dulint_get_low(log_sys->archived_lsn), + ut_dulint_get_high(limit_lsn), + ut_dulint_get_low(limit_lsn)); + } + + /* Read the log segment to the archive buffer */ + + log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf, + UT_LIST_GET_FIRST(log_sys->log_groups), + start_lsn, limit_lsn); + + mutex_exit(&(log_sys->mutex)); + + if (sync) { + rw_lock_s_lock(&(log_sys->archive_lock)); + rw_lock_s_unlock(&(log_sys->archive_lock)); + } + + *n_bytes = log_sys->archive_buf_size; + + return(TRUE); +} + +/******************************************************************** +Writes the log contents to the archive at least up to the lsn when this +function was called. */ +static +void +log_archive_all(void) +/*=================*/ +{ + dulint present_lsn; + ulint dummy; + + mutex_enter(&(log_sys->mutex)); + + if (log_sys->archiving_state == LOG_ARCH_OFF) { + mutex_exit(&(log_sys->mutex)); + + return; + } + + present_lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + log_pad_current_log_block(); + + for (;;) { + mutex_enter(&(log_sys->mutex)); + + if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) { + + mutex_exit(&(log_sys->mutex)); + + return; + } + + mutex_exit(&(log_sys->mutex)); + + log_archive_do(TRUE, &dummy); + } +} + +/********************************************************* +Closes the possible open archive log file (for each group) the first group, +and if it was open, increments the group file count by 2, if desired. */ +static +void +log_archive_close_groups( +/*=====================*/ + ibool increment_file_count) /* in: TRUE if we want to increment + the file count */ +{ + log_group_t* group; + ulint trunc_len; + + ut_ad(mutex_own(&(log_sys->mutex))); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + trunc_len = UNIV_PAGE_SIZE + * fil_space_get_size(group->archive_space_id); + + if (trunc_len > 0) { + ut_a(trunc_len == group->file_size); + + /* Write a notice to the headers of archived log + files that the file write has been completed */ + + log_group_archive_completed_header_write(group, + 0, log_sys->archived_lsn); + + fil_space_truncate_start(group->archive_space_id, + trunc_len); + if (increment_file_count) { + group->archived_offset = 0; + group->archived_file_no += 2; + } + + if (log_debug_writes) { + printf( + "Incrementing arch file no to %lu in log group %lu\n", + group->archived_file_no + 2, group->id); + } + } +} + +/******************************************************************** +Writes the log contents to the archive up to the lsn when this function was +called, and stops the archiving. When archiving is started again, the archived +log file numbers start from 2 higher, so that the archiving will +not write again to the archived log files which exist when this function +returns. */ + +ulint +log_archive_stop(void) +/*==================*/ + /* out: DB_SUCCESS or DB_ERROR */ +{ + ibool success; + + mutex_enter(&(log_sys->mutex)); + + if (log_sys->archiving_state != LOG_ARCH_ON) { + + mutex_exit(&(log_sys->mutex)); + + return(DB_ERROR); + } + + log_sys->archiving_state = LOG_ARCH_STOPPING; + + mutex_exit(&(log_sys->mutex)); + + log_archive_all(); + + mutex_enter(&(log_sys->mutex)); + + log_sys->archiving_state = LOG_ARCH_STOPPING2; + os_event_reset(log_sys->archiving_on); + + mutex_exit(&(log_sys->mutex)); + + /* Wait for a possible archiving operation to end */ + + rw_lock_s_lock(&(log_sys->archive_lock)); + rw_lock_s_unlock(&(log_sys->archive_lock)); + + mutex_enter(&(log_sys->mutex)); + + /* Close all archived log files, incrementing the file count by 2, + if appropriate */ + + log_archive_close_groups(TRUE); + + mutex_exit(&(log_sys->mutex)); + + /* Make a checkpoint, so that if recovery is needed, the file numbers + of new archived log files will start from the right value */ + + success = FALSE; + + while (!success) { + success = log_checkpoint(TRUE, TRUE); + } + + mutex_enter(&(log_sys->mutex)); + + log_sys->archiving_state = LOG_ARCH_STOPPED; + + mutex_exit(&(log_sys->mutex)); + + return(DB_SUCCESS); +} + +/******************************************************************** +Starts again archiving which has been stopped. */ + +ulint +log_archive_start(void) +/*===================*/ + /* out: DB_SUCCESS or DB_ERROR */ +{ + mutex_enter(&(log_sys->mutex)); + + if (log_sys->archiving_state != LOG_ARCH_STOPPED) { + + mutex_exit(&(log_sys->mutex)); + + return(DB_ERROR); + } + + log_sys->archiving_state = LOG_ARCH_ON; + + os_event_set(log_sys->archiving_on); + + mutex_exit(&(log_sys->mutex)); + + return(DB_SUCCESS); +} + +/******************************************************************** +Stop archiving the log so that a gap may occur in the archived log files. */ + +ulint +log_archive_noarchivelog(void) +/*==========================*/ + /* out: DB_SUCCESS or DB_ERROR */ +{ +loop: + mutex_enter(&(log_sys->mutex)); + + if (log_sys->archiving_state == LOG_ARCH_STOPPED + || log_sys->archiving_state == LOG_ARCH_OFF) { + + log_sys->archiving_state = LOG_ARCH_OFF; + + os_event_set(log_sys->archiving_on); + + mutex_exit(&(log_sys->mutex)); + + return(DB_SUCCESS); + } + + mutex_exit(&(log_sys->mutex)); + + log_archive_stop(); + + os_thread_sleep(500000); + + goto loop; +} + +/******************************************************************** +Start archiving the log so that a gap may occur in the archived log files. */ + +ulint +log_archive_archivelog(void) +/*========================*/ + /* out: DB_SUCCESS or DB_ERROR */ +{ + mutex_enter(&(log_sys->mutex)); + + if (log_sys->archiving_state == LOG_ARCH_OFF) { + + log_sys->archiving_state = LOG_ARCH_ON; + + log_sys->archived_lsn = ut_dulint_align_down(log_sys->lsn, + OS_FILE_LOG_BLOCK_SIZE); + mutex_exit(&(log_sys->mutex)); + + return(DB_SUCCESS); + } + + mutex_exit(&(log_sys->mutex)); + + return(DB_ERROR); +} + +/******************************************************************** +Tries to establish a big enough margin of free space in the log groups, such +that a new log entry can be catenated without an immediate need for +archiving. */ +static +void +log_archive_margin(void) +/*====================*/ +{ + log_t* log = log_sys; + ulint age; + ibool sync; + ulint dummy; +loop: + mutex_enter(&(log->mutex)); + + if (log->archiving_state == LOG_ARCH_OFF) { + mutex_exit(&(log->mutex)); + + return; + } + + age = ut_dulint_minus(log->lsn, log->archived_lsn); + + if (age > log->max_archived_lsn_age) { + + /* An archiving is urgent: we have to do synchronous i/o */ + + sync = TRUE; + + } else if (age > log->max_archived_lsn_age_async) { + + /* An archiving is not urgent: we do asynchronous i/o */ + + sync = FALSE; + } else { + /* No archiving required yet */ + + mutex_exit(&(log->mutex)); + + return; + } + + mutex_exit(&(log->mutex)); + + log_archive_do(sync, &dummy); + + if (sync == TRUE) { + /* Check again that enough was written to the archive */ + + goto loop; + } +} + +/************************************************************************ +Checks that there is enough free space in the log to start a new query step. +Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this +function may only be called if the calling thread owns no synchronization +objects! */ + +void +log_check_margins(void) +/*===================*/ +{ +loop: + log_flush_margin(); + + log_checkpoint_margin(); + + log_archive_margin(); + + mutex_enter(&(log_sys->mutex)); + + if (log_sys->check_flush_or_checkpoint) { + + mutex_exit(&(log_sys->mutex)); + + goto loop; + } + + mutex_exit(&(log_sys->mutex)); +} + +/********************************************************** +Switches the database to the online backup state. */ + +ulint +log_switch_backup_state_on(void) +/*============================*/ + /* out: DB_SUCCESS or DB_ERROR */ +{ + dulint backup_lsn; + + mutex_enter(&(log_sys->mutex)); + + if (log_sys->online_backup_state) { + + /* The database is already in that state */ + + mutex_exit(&(log_sys->mutex)); + + return(DB_ERROR); + } + + log_sys->online_backup_state = TRUE; + + backup_lsn = log_sys->lsn; + + log_sys->online_backup_lsn = backup_lsn; + + mutex_exit(&(log_sys->mutex)); + + /* log_checkpoint_and_mark_file_spaces(); */ + + return(DB_SUCCESS); +} + +/********************************************************** +Switches the online backup state off. */ + +ulint +log_switch_backup_state_off(void) +/*=============================*/ + /* out: DB_SUCCESS or DB_ERROR */ +{ + mutex_enter(&(log_sys->mutex)); + + if (!log_sys->online_backup_state) { + + /* The database is already in that state */ + + mutex_exit(&(log_sys->mutex)); + + return(DB_ERROR); + } + + log_sys->online_backup_state = FALSE; + + mutex_exit(&(log_sys->mutex)); + + return(DB_SUCCESS); +} + +/******************************************************************** +Makes a checkpoint at the latest lsn and writes it to first page of each +data file in the database, so that we know that the file spaces contain +all modifications up to that lsn. This can only be called at database +shutdown. This function also writes all log in log files to the log archive. */ + +void +logs_empty_and_mark_files_at_shutdown(void) +/*=======================================*/ +{ + dulint lsn; + ulint arch_log_no; + + fprintf(stderr, "Innobase: Starting shutdown...\n"); + + /* Wait until the master thread and all other operations are idle: our + algorithm only works if the server is idle at shutdown */ +loop: + os_thread_sleep(100000); + + mutex_enter(&kernel_mutex); + + if (trx_n_mysql_transactions > 0 + || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { + + mutex_exit(&kernel_mutex); + + goto loop; + } + + if (srv_n_threads_active[SRV_MASTER] != 0) { + + mutex_exit(&kernel_mutex); + + goto loop; + } + + mutex_exit(&kernel_mutex); + + mutex_enter(&(log_sys->mutex)); + + if (log_sys->n_pending_archive_ios + + log_sys->n_pending_checkpoint_writes + + log_sys->n_pending_writes > 0) { + + mutex_exit(&(log_sys->mutex)); + + goto loop; + } + + mutex_exit(&(log_sys->mutex)); + + if (!buf_pool_check_no_pending_io()) { + + goto loop; + } + + log_archive_all(); + + log_make_checkpoint_at(ut_dulint_max, TRUE); + + mutex_enter(&(log_sys->mutex)); + + lsn = log_sys->lsn; + + if (ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0 + || (srv_log_archive_on + && ut_dulint_cmp(lsn, + ut_dulint_add(log_sys->archived_lsn, LOG_BLOCK_HDR_SIZE)) != 0)) { + + mutex_exit(&(log_sys->mutex)); + + goto loop; + } + + arch_log_no = + UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no; + + if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) { + + arch_log_no--; + } + + log_archive_close_groups(TRUE); + + mutex_exit(&(log_sys->mutex)); + + fil_flush_file_spaces(FIL_TABLESPACE); + fil_flush_file_spaces(FIL_LOG); + + /* The following fil_write_... will pass the buffer pool: therefore + it is essential that the buffer pool has been completely flushed + to disk! */ + + if (!buf_all_freed()) { + + goto loop; + } + + fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); + + fil_flush_file_spaces(FIL_TABLESPACE); + + fprintf(stderr, "Innobase: Shutdown completed\n"); +} + +/********************************************************** +Checks by parsing that the catenated log segment for a single mtr is +consistent. */ + +ibool +log_check_log_recs( +/*===============*/ + byte* buf, /* in: pointer to the start of the log segment + in the log_sys->buf log buffer */ + ulint len, /* in: segment length in bytes */ + dulint buf_start_lsn) /* in: buffer start lsn */ +{ + dulint contiguous_lsn; + dulint scanned_lsn; + byte* start; + byte* end; + byte* buf1; + byte* scan_buf; + + ut_ad(mutex_own(&(log_sys->mutex))); + + if (len == 0) { + + return(TRUE); + } + + start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE); + end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE); + + buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE); + scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE); + + ut_memcpy(scan_buf, start, end - start); + + recv_scan_log_recs(FALSE, scan_buf, end - start, + ut_dulint_align_down(buf_start_lsn, + OS_FILE_LOG_BLOCK_SIZE), + &contiguous_lsn, &scanned_lsn); + + ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len)) + == 0); + ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0); + + mem_free(buf1); + + return(TRUE); +} + +/********************************************************** +Prints info of the log. */ + +void +log_print(void) +/*===========*/ +{ + printf("Log sequence number %lu %lu\n", + ut_dulint_get_high(log_sys->lsn), + ut_dulint_get_low(log_sys->lsn)); +} + |