diff options
Diffstat (limited to 'innobase/log/log0log.c')
-rw-r--r-- | innobase/log/log0log.c | 207 |
1 files changed, 164 insertions, 43 deletions
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c index 2ba035d1eb2..d6e9deaa151 100644 --- a/innobase/log/log0log.c +++ b/innobase/log/log0log.c @@ -162,6 +162,8 @@ log_reserve_and_open( ulint archived_lsn_age; ulint count = 0; ulint dummy; + + ut_a(len < log->buf_size / 2); loop: mutex_enter(&(log->mutex)); @@ -663,6 +665,8 @@ log_init(void) log_sys->buf_next_to_write = 0; + log_sys->flush_lsn = ut_dulint_zero; + log_sys->written_to_some_lsn = log_sys->lsn; log_sys->written_to_all_lsn = log_sys->lsn; @@ -777,9 +781,15 @@ log_group_init( *(group->file_header_bufs + i) = ut_align( mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE); + + memset(*(group->file_header_bufs + i), '\0', + LOG_FILE_HDR_SIZE); + *(group->archive_file_header_bufs + i) = ut_align( mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE); + memset(*(group->archive_file_header_bufs + i), '\0', + LOG_FILE_HDR_SIZE); } group->archive_space_id = archive_space_id; @@ -791,6 +801,8 @@ log_group_init( mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE); + memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE); + UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group); ut_a(log_calc_max_ages()); @@ -839,7 +851,7 @@ log_group_check_flush_completion( { ut_ad(mutex_own(&(log_sys->mutex))); - if (!log_sys->one_flushed && (group->n_pending_writes == 0)) { + if (!log_sys->one_flushed && group->n_pending_writes == 0) { if (log_debug_writes) { printf("Log flushed first to group %lu\n", group->id); @@ -933,16 +945,20 @@ log_io_complete( return; } + ut_a(0); /* We currently use synchronous writing of the + logs and cannot end up here! */ + if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC - && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { + && srv_unix_file_flush_method != SRV_UNIX_NOSYNC + && srv_flush_log_at_trx_commit != 2) { fil_flush(group->space_id); } mutex_enter(&(log_sys->mutex)); - ut_ad(group->n_pending_writes > 0); - ut_ad(log_sys->n_pending_writes > 0); + ut_a(group->n_pending_writes > 0); + ut_a(log_sys->n_pending_writes > 0); group->n_pending_writes--; log_sys->n_pending_writes--; @@ -956,6 +972,57 @@ log_io_complete( } /********************************************************** +Flushes the log files to the disk, using, for example, the Unix fsync. +This function does the flush even if the user has set +srv_flush_log_at_trx_commit = FALSE. */ + +void +log_flush_to_disk(void) +/*===================*/ +{ + log_group_t* group; +loop: + mutex_enter(&(log_sys->mutex)); + + if (log_sys->n_pending_writes > 0) { + /* A log file write is running */ + + mutex_exit(&(log_sys->mutex)); + + /* Wait for the log file write to complete and try again */ + + os_event_wait(log_sys->no_flush_event); + + goto loop; + } + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + log_sys->n_pending_writes++; + group->n_pending_writes++; + + os_event_reset(log_sys->no_flush_event); + os_event_reset(log_sys->one_flushed_event); + + mutex_exit(&(log_sys->mutex)); + + fil_flush(group->space_id); + + mutex_enter(&(log_sys->mutex)); + + ut_a(group->n_pending_writes == 1); + ut_a(log_sys->n_pending_writes == 1); + + group->n_pending_writes--; + log_sys->n_pending_writes--; + + os_event_set(log_sys->no_flush_event); + os_event_set(log_sys->one_flushed_event); + + mutex_exit(&(log_sys->mutex)); +} + +/********************************************************** Writes a log file header to a log file space. */ static void @@ -970,7 +1037,6 @@ log_group_file_header_flush( { byte* buf; ulint dest_offset; - ibool sync; ut_ad(mutex_own(&(log_sys->mutex))); @@ -981,15 +1047,11 @@ log_group_file_header_flush( mach_write_to_4(buf + LOG_GROUP_ID, group->id); mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); - dest_offset = nth_file * group->file_size; + /* Wipe over possible label of ibbackup --restore */ + memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4); - sync = FALSE; - - if (type == LOG_RECOVER) { + dest_offset = nth_file * group->file_size; - sync = TRUE; - } - if (log_debug_writes) { printf( "Writing log file header to group %lu file %lu\n", group->id, @@ -997,14 +1059,9 @@ log_group_file_header_flush( } if (log_do_write) { - if (type == LOG_FLUSH) { - log_sys->n_pending_writes++; - group->n_pending_writes++; - } - log_sys->n_log_ios++; - fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id, + fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, dest_offset / UNIV_PAGE_SIZE, dest_offset % UNIV_PAGE_SIZE, OS_FILE_LOG_BLOCK_SIZE, @@ -1013,6 +1070,31 @@ log_group_file_header_flush( } /********************************************************** +Stores a 1-byte checksum to the trailer checksum field of a log block +before writing it to a log file. This checksum is used in recovery to +check the consistency of a log block. The checksum is simply the 8 low +bits of 1 + the sum of the bytes in the log block except the trailer bytes. */ +static +void +log_block_store_checksum( +/*=====================*/ + byte* block) /* in/out: pointer to a log block */ +{ + ulint i; + ulint sum; + + sum = 1; + + for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) { + sum += (ulint)(*(block + i)); + } + + mach_write_to_1(block + OS_FILE_LOG_BLOCK_SIZE + - LOG_BLOCK_TRL_CHECKSUM, + 0xFF & sum); +} + +/********************************************************** Writes a buffer to a log file group. */ void @@ -1032,20 +1114,13 @@ log_group_write_buf( header */ { ulint write_len; - ibool sync; ibool write_header; ulint next_offset; + ulint i; ut_ad(mutex_own(&(log_sys->mutex))); - ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); - - sync = FALSE; - - if (type == LOG_RECOVER) { - - sync = TRUE; - } + ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); if (new_data_offset == 0) { write_header = TRUE; @@ -1076,7 +1151,6 @@ loop: } if (log_debug_writes) { - ulint i; printf( "Writing log file segment to group %lu offset %lu len %lu\n" @@ -1100,15 +1174,17 @@ loop: } } - if (log_do_write) { - if (type == LOG_FLUSH) { - log_sys->n_pending_writes++; - group->n_pending_writes++; - } + /* Calculate the checksums for each log block and write them to + the trailer fields of the log blocks */ + + for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) { + log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE); + } + if (log_do_write) { log_sys->n_log_ios++; - fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id, + fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE, write_len, buf, group); } @@ -1126,15 +1202,15 @@ loop: /********************************************************** This function is called, e.g., when a transaction wants to commit. It checks -that the log has been flushed to disk up to the last log entry written by the -transaction. If there is a flush running, it waits and checks if the flush -flushed enough. If not, starts a new flush. */ +that the log has been written to the log file up to the last log entry written +by the transaction. If there is a flush running, it waits and checks if the +flush flushed enough. If not, starts a new flush. */ void log_flush_up_to( /*============*/ dulint lsn, /* in: log sequence number up to which the log should - be flushed, ut_dulint_max if not specified */ + be written, ut_dulint_max if not specified */ ulint wait) /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, or LOG_WAIT_ALL_GROUPS */ { @@ -1144,6 +1220,7 @@ log_flush_up_to( ulint area_start; ulint area_end; ulint loop_count; + ulint unlock; if (recv_no_ibuf_operations) { /* Recovery is running and no operations on the log files are @@ -1209,6 +1286,12 @@ loop: ut_dulint_get_low(log_sys->lsn)); } + log_sys->n_pending_writes++; + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + group->n_pending_writes++; /* We assume here that we have only + one log group! */ + os_event_reset(log_sys->no_flush_event); os_event_reset(log_sys->one_flushed_event); @@ -1254,6 +1337,36 @@ loop: group = UT_LIST_GET_NEXT(log_groups, group); } + mutex_exit(&(log_sys->mutex)); + + if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC + && srv_unix_file_flush_method != SRV_UNIX_NOSYNC + && srv_flush_log_at_trx_commit != 2) { + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + fil_flush(group->space_id); + } + + mutex_enter(&(log_sys->mutex)); + + group = UT_LIST_GET_FIRST(log_sys->log_groups); + + ut_a(group->n_pending_writes == 1); + ut_a(log_sys->n_pending_writes == 1); + + group->n_pending_writes--; + log_sys->n_pending_writes--; + + unlock = log_group_check_flush_completion(group); + unlock = unlock | log_sys_check_flush_completion(); + + log_flush_do_unlocks(unlock); + + mutex_exit(&(log_sys->mutex)); + + return; + do_waits: mutex_exit(&(log_sys->mutex)); @@ -1539,15 +1652,23 @@ log_reset_first_header_and_checkpoint( /*==================================*/ byte* hdr_buf,/* in: buffer which will be written to the start of the first log file */ - dulint lsn) /* in: lsn of the start of the first log file - + LOG_BLOCK_HDR_SIZE */ + dulint start) /* in: lsn of the start of the first log file; + we pretend that there is a checkpoint at + start + LOG_BLOCK_HDR_SIZE */ { ulint fold; byte* buf; - + dulint lsn; + mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0); - mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, lsn); + mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start); + + lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE); + /* Write the label of ibbackup --restore */ + sprintf(hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, "ibbackup "); + ut_sprintf_timestamp(hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP + + strlen("ibbackup ")); buf = hdr_buf + LOG_CHECKPOINT_1; mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero); |