summaryrefslogtreecommitdiff
path: root/innobase/log/log0log.c
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/log/log0log.c')
-rw-r--r--innobase/log/log0log.c2781
1 files changed, 2781 insertions, 0 deletions
diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c
new file mode 100644
index 00000000000..c6fec44d128
--- /dev/null
+++ b/innobase/log/log0log.c
@@ -0,0 +1,2781 @@
+/******************************************************
+Database log
+
+(c) 1995-1997 Innobase Oy
+
+Created 12/9/1995 Heikki Tuuri
+*******************************************************/
+
+#include "log0log.h"
+
+#ifdef UNIV_NONINL
+#include "log0log.ic"
+#endif
+
+#include "mem0mem.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "srv0srv.h"
+#include "log0recv.h"
+#include "fil0fil.h"
+#include "dict0boot.h"
+#include "srv0srv.h"
+#include "trx0sys.h"
+#include "trx0trx.h"
+
+/* Global log system variable */
+log_t* log_sys = NULL;
+
+ibool log_do_write = TRUE;
+ibool log_debug_writes = FALSE;
+
+/* Pointer to this variable is used as the i/o-message when we do i/o to an
+archive */
+byte log_archive_io;
+
+/* A margin for free space in the log buffer before a log entry is catenated */
+#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
+
+/* Margins for free space in the log buffer after a log entry is catenated */
+#define LOG_BUF_FLUSH_RATIO 2
+#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
+
+/* Margin for the free space in the smallest log group, before a new query
+step which modifies the database, is started */
+
+#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
+#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
+
+/* This parameter controls asynchronous making of a new checkpoint; the value
+should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
+
+#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32
+
+/* This parameter controls synchronous preflushing of modified buffer pages */
+#define LOG_POOL_PREFLUSH_RATIO_SYNC 16
+
+/* The same ratio for asynchronous preflushing; this value should be less than
+the previous */
+#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8
+
+/* Extra margin, in addition to one log file, used in archiving */
+#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE)
+
+/* This parameter controls asynchronous writing to the archive */
+#define LOG_ARCHIVE_RATIO_ASYNC 16
+
+/* Codes used in unlocking flush latches */
+#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1
+#define LOG_UNLOCK_FLUSH_LOCK 2
+
+/* States of an archiving operation */
+#define LOG_ARCHIVE_READ 1
+#define LOG_ARCHIVE_WRITE 2
+
+/**********************************************************
+Calculates the file count of an lsn within a log group. */
+static
+ulint
+log_group_calc_lsn_file_count(
+/*==========================*/
+ /* out: file count within the log group */
+ dulint lsn, /* in: lsn, must be within 4 GB of
+ group->next_block_lsn */
+ log_group_t* group); /* in: log group */
+/**********************************************************
+Completes a checkpoint write i/o to a log file. */
+static
+void
+log_io_complete_checkpoint(
+/*=======================*/
+ log_group_t* group); /* in: log group */
+/**********************************************************
+Completes an archiving i/o. */
+static
+void
+log_io_complete_archive(void);
+/*=========================*/
+/********************************************************************
+Tries to establish a big enough margin of free space in the log groups, such
+that a new log entry can be catenated without an immediate need for a
+archiving. */
+static
+void
+log_archive_margin(void);
+/*====================*/
+
+
+/********************************************************************
+Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
+exists. */
+static
+dulint
+log_buf_pool_get_oldest_modification(void)
+/*======================================*/
+{
+ dulint lsn;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ lsn = buf_pool_get_oldest_modification();
+
+ if (ut_dulint_is_zero(lsn)) {
+
+ lsn = log_sys->lsn;
+ }
+
+ return(lsn);
+}
+
+/****************************************************************
+Opens the log for log_write_low. The log must be closed with log_close and
+released with log_release. */
+
+dulint
+log_reserve_and_open(
+/*=================*/
+ /* out: start lsn of the log record */
+ ulint len) /* in: length of data to be catenated */
+{
+ log_t* log = log_sys;
+ ulint len_upper_limit;
+ ulint archived_lsn_age;
+ ulint count = 0;
+ ulint dummy;
+loop:
+ mutex_enter(&(log->mutex));
+
+ /* Calculate an upper limit for the space the string may take in the
+ log buffer */
+
+ len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4;
+
+ if (log->buf_free + len_upper_limit > log->buf_size) {
+
+ mutex_exit(&(log->mutex));
+
+ /* Not enough free space, do a syncronous flush of the log
+ buffer */
+ log_flush_up_to(ut_dulint_max, LOG_WAIT_ALL_GROUPS);
+
+ count++;
+
+ ut_ad(count < 50);
+
+ goto loop;
+ }
+
+ if (log->archiving_state != LOG_ARCH_OFF) {
+
+ archived_lsn_age = ut_dulint_minus(log->lsn, log->archived_lsn);
+
+ if (archived_lsn_age + len_upper_limit
+ > log->max_archived_lsn_age) {
+
+ /* Not enough free archived space in log groups: do a
+ synchronous archive write batch: */
+
+ mutex_exit(&(log->mutex));
+
+ ut_ad(len_upper_limit <= log->max_archived_lsn_age);
+
+ log_archive_do(TRUE, &dummy);
+
+ count++;
+
+ ut_ad(count < 50);
+
+ goto loop;
+ }
+ }
+
+#ifdef UNIV_LOG_DEBUG
+ log->old_buf_free = log->buf_free;
+ log->old_lsn = log->lsn;
+#endif
+ return(log->lsn);
+}
+
+/****************************************************************
+Writes to the log the string given. It is assumed that the caller holds the
+log mutex. */
+
+void
+log_write_low(
+/*==========*/
+ byte* str, /* in: string */
+ ulint str_len) /* in: string length */
+{
+ log_t* log = log_sys;
+ ulint len;
+ ulint data_len;
+ byte* log_block;
+
+ ut_ad(mutex_own(&(log->mutex)));
+part_loop:
+ /* Calculate a part length */
+
+ data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
+
+ if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
+
+ /* The string fits within the current log block */
+
+ len = str_len;
+ } else {
+ data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
+
+ len = OS_FILE_LOG_BLOCK_SIZE
+ - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+ - LOG_BLOCK_TRL_SIZE;
+ }
+
+ ut_memcpy(log->buf + log->buf_free, str, len);
+
+ str_len -= len;
+ str = str + len;
+
+ log_block = ut_align_down(log->buf + log->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE);
+ log_block_set_data_len(log_block, data_len);
+
+ if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
+ /* This block became full */
+ log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
+ log_block_set_checkpoint_no(log_block,
+ log_sys->next_checkpoint_no);
+ len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
+
+ log->lsn = ut_dulint_add(log->lsn, len);
+
+ /* Initialize the next block header and trailer */
+ log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
+ } else {
+ log->lsn = ut_dulint_add(log->lsn, len);
+ }
+
+ log->buf_free += len;
+
+ ut_ad(log->buf_free <= log->buf_size);
+
+ if (str_len > 0) {
+ goto part_loop;
+ }
+}
+
+/****************************************************************
+Closes the log. */
+
+dulint
+log_close(void)
+/*===========*/
+ /* out: lsn */
+{
+ byte* log_block;
+ ulint first_rec_group;
+ dulint oldest_lsn;
+ dulint lsn;
+ log_t* log = log_sys;
+
+ ut_ad(mutex_own(&(log->mutex)));
+
+ lsn = log->lsn;
+
+ log_block = ut_align_down(log->buf + log->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE);
+ first_rec_group = log_block_get_first_rec_group(log_block);
+
+ if (first_rec_group == 0) {
+ /* We initialized a new log block which was not written
+ full by the current mtr: the next mtr log record group
+ will start within this block at the offset data_len */
+
+ log_block_set_first_rec_group(log_block,
+ log_block_get_data_len(log_block));
+ }
+
+ if (log->buf_free > log->max_buf_free) {
+
+ log->check_flush_or_checkpoint = TRUE;
+ }
+
+ if (ut_dulint_minus(lsn, log->last_checkpoint_lsn)
+ <= log->max_modified_age_async) {
+ goto function_exit;
+ }
+
+ oldest_lsn = buf_pool_get_oldest_modification();
+
+ if (ut_dulint_is_zero(oldest_lsn)
+ || (ut_dulint_minus(lsn, oldest_lsn)
+ > log->max_modified_age_async)
+ || (ut_dulint_minus(lsn, log->last_checkpoint_lsn)
+ > log->max_checkpoint_age_async)) {
+
+ log->check_flush_or_checkpoint = TRUE;
+ }
+function_exit:
+
+#ifdef UNIV_LOG_DEBUG
+ log_check_log_recs(log->buf + log->old_buf_free,
+ log->buf_free - log->old_buf_free, log->old_lsn);
+#endif
+
+ return(lsn);
+}
+
+/**********************************************************
+Pads the current log block full with dummy log records. Used in producing
+consistent archived log files. */
+static
+void
+log_pad_current_log_block(void)
+/*===========================*/
+{
+ byte b = MLOG_DUMMY_RECORD;
+ ulint pad_length;
+ ulint i;
+ dulint lsn;
+
+ log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE);
+
+ pad_length = OS_FILE_LOG_BLOCK_SIZE
+ - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE)
+ - LOG_BLOCK_TRL_SIZE;
+
+ for (i = 0; i < pad_length; i++) {
+ log_write_low(&b, 1);
+ }
+
+ lsn = log_sys->lsn;
+
+ log_close();
+ log_release();
+
+ ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE)
+ == LOG_BLOCK_HDR_SIZE);
+}
+
+/**********************************************************
+Calculates the data capacity of a log group, when the log file headers are not
+included. */
+
+ulint
+log_group_get_capacity(
+/*===================*/
+ /* out: capacity in bytes */
+ log_group_t* group) /* in: log group */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
+}
+
+/**********************************************************
+Calculates the offset within a log group, when the log file headers are not
+included. */
+UNIV_INLINE
+ulint
+log_group_calc_size_offset(
+/*=======================*/
+ /* out: size offset (<= offset) */
+ ulint offset, /* in: real offset within the log group */
+ log_group_t* group) /* in: log group */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
+}
+
+/**********************************************************
+Calculates the offset within a log group, when the log file headers are
+included. */
+UNIV_INLINE
+ulint
+log_group_calc_real_offset(
+/*=======================*/
+ /* out: real offset (>= offset) */
+ ulint offset, /* in: size offset within the log group */
+ log_group_t* group) /* in: log group */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ return(offset + LOG_FILE_HDR_SIZE
+ * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
+}
+
+/**********************************************************
+Calculates the offset of an lsn within a log group. */
+static
+ulint
+log_group_calc_lsn_offset(
+/*======================*/
+ /* out: offset within the log group */
+ dulint lsn, /* in: lsn, must be within 4 GB of group->lsn */
+ log_group_t* group) /* in: log group */
+{
+ dulint gr_lsn;
+ ulint gr_lsn_size_offset;
+ ulint difference;
+ ulint group_size;
+ ulint offset;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ gr_lsn = group->lsn;
+
+ gr_lsn_size_offset = log_group_calc_size_offset(group->lsn_offset,
+ group);
+ group_size = log_group_get_capacity(group);
+
+ if (ut_dulint_cmp(lsn, gr_lsn) >= 0) {
+
+ difference = ut_dulint_minus(lsn, gr_lsn);
+ } else {
+ difference = ut_dulint_minus(gr_lsn, lsn);
+
+ difference = difference % group_size;
+
+ difference = group_size - difference;
+ }
+
+ offset = (gr_lsn_size_offset + difference) % group_size;
+
+ return(log_group_calc_real_offset(offset, group));
+}
+
+/************************************************************
+Sets the field values in group to correspond to a given lsn. For this function
+to work, the values must already be correctly initialized to correspond to
+some lsn, for instance, a checkpoint lsn. */
+
+void
+log_group_set_fields(
+/*=================*/
+ log_group_t* group, /* in: group */
+ dulint lsn) /* in: lsn for which the values should be
+ set */
+{
+ group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
+ group->lsn = lsn;
+}
+
+/*********************************************************************
+Calculates the recommended highest values for lsn - last_checkpoint_lsn,
+lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */
+static
+ibool
+log_calc_max_ages(void)
+/*===================*/
+ /* out: error value FALSE if the smallest log group is
+ too small to accommodate the number of OS threads in
+ the database server */
+{
+ log_group_t* group;
+ ulint n_threads;
+ ulint margin;
+ ulint free;
+ ibool success = TRUE;
+ ulint smallest_capacity;
+ ulint archive_margin;
+ ulint smallest_archive_margin;
+
+ ut_ad(!mutex_own(&(log_sys->mutex)));
+
+ n_threads = srv_get_n_threads();
+
+ mutex_enter(&(log_sys->mutex));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ ut_ad(group);
+
+ smallest_capacity = ULINT_MAX;
+ smallest_archive_margin = ULINT_MAX;
+
+ while (group) {
+ if (log_group_get_capacity(group) < smallest_capacity) {
+
+ smallest_capacity = log_group_get_capacity(group);
+ }
+
+ archive_margin = log_group_get_capacity(group)
+ - (group->file_size - LOG_FILE_HDR_SIZE)
+ - LOG_ARCHIVE_EXTRA_MARGIN;
+
+ if (archive_margin < smallest_archive_margin) {
+
+ smallest_archive_margin = archive_margin;
+ }
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+ /* For each OS thread we must reserve so much free space in the
+ smallest log group that it can accommodate the log entries produced
+ by single query steps: running out of free log space is a serious
+ system error which requires rebooting the database. */
+
+ free = LOG_CHECKPOINT_FREE_PER_THREAD * n_threads
+ + LOG_CHECKPOINT_EXTRA_FREE;
+ if (free >= smallest_capacity / 2) {
+ success = FALSE;
+ } else {
+ margin = smallest_capacity - free;
+ }
+
+ margin = ut_min(margin, log_sys->adm_checkpoint_interval);
+
+ log_sys->max_modified_age_async = margin
+ - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
+ log_sys->max_modified_age_sync = margin
+ - margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
+
+ log_sys->max_checkpoint_age_async = margin - margin
+ / LOG_POOL_CHECKPOINT_RATIO_ASYNC;
+ log_sys->max_checkpoint_age = margin;
+
+ log_sys->max_archived_lsn_age = smallest_archive_margin;
+
+ log_sys->max_archived_lsn_age_async = smallest_archive_margin
+ - smallest_archive_margin /
+ LOG_ARCHIVE_RATIO_ASYNC;
+ mutex_exit(&(log_sys->mutex));
+
+ if (!success) {
+ printf(
+ "Error: log file group too small for the number of threads\n");
+ }
+
+ return(success);
+}
+
+/**********************************************************
+Initializes the log. */
+
+void
+log_init(void)
+/*==========*/
+{
+ byte* buf;
+
+ log_sys = mem_alloc(sizeof(log_t));
+
+ mutex_create(&(log_sys->mutex));
+ mutex_set_level(&(log_sys->mutex), SYNC_LOG);
+
+ mutex_enter(&(log_sys->mutex));
+
+ /* Start the lsn from one log block from zero: this way every
+ log record has a start lsn != zero, a fact which we will use */
+
+ log_sys->lsn = LOG_START_LSN;
+
+ ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
+ ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
+
+ buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE);
+
+ log_sys->buf_size = LOG_BUFFER_SIZE;
+ log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
+ - LOG_BUF_FLUSH_MARGIN;
+ log_sys->check_flush_or_checkpoint = TRUE;
+ UT_LIST_INIT(log_sys->log_groups);
+
+ log_sys->n_log_ios = 0;
+
+ /*----------------------------*/
+
+ log_sys->buf_next_to_write = 0;
+
+ log_sys->written_to_some_lsn = log_sys->lsn;
+ log_sys->written_to_all_lsn = log_sys->lsn;
+
+ log_sys->n_pending_writes = 0;
+
+ log_sys->no_flush_event = os_event_create(NULL);
+
+ os_event_set(log_sys->no_flush_event);
+
+ log_sys->one_flushed_event = os_event_create(NULL);
+
+ os_event_set(log_sys->one_flushed_event);
+
+ /*----------------------------*/
+ log_sys->adm_checkpoint_interval = ULINT_MAX;
+
+ log_sys->next_checkpoint_no = ut_dulint_zero;
+ log_sys->last_checkpoint_lsn = log_sys->lsn;
+ log_sys->n_pending_checkpoint_writes = 0;
+
+ rw_lock_create(&(log_sys->checkpoint_lock));
+ rw_lock_set_level(&(log_sys->checkpoint_lock), SYNC_NO_ORDER_CHECK);
+
+ log_sys->checkpoint_buf = ut_align(
+ mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+ /*----------------------------*/
+
+ log_sys->archiving_state = LOG_ARCH_ON;
+ log_sys->archived_lsn = log_sys->lsn;
+
+ log_sys->n_pending_archive_ios = 0;
+
+ rw_lock_create(&(log_sys->archive_lock));
+ rw_lock_set_level(&(log_sys->archive_lock), SYNC_NO_ORDER_CHECK);
+
+ log_sys->archive_buf = ut_align(
+ ut_malloc(LOG_ARCHIVE_BUF_SIZE
+ + OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+ log_sys->archive_buf_size = LOG_ARCHIVE_BUF_SIZE;
+
+ log_sys->archiving_on = os_event_create(NULL);
+
+ /*----------------------------*/
+
+ log_sys->online_backup_state = FALSE;
+
+ /*----------------------------*/
+
+ log_block_init(log_sys->buf, log_sys->lsn);
+ log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
+
+ log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
+ log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE);
+
+ mutex_exit(&(log_sys->mutex));
+
+#ifdef UNIV_LOG_DEBUG
+ recv_sys_create();
+ recv_sys_init();
+
+ recv_sys->parse_start_lsn = log_sys->lsn;
+ recv_sys->scanned_lsn = log_sys->lsn;
+ recv_sys->scanned_checkpoint_no = 0;
+ recv_sys->recovered_lsn = log_sys->lsn;
+ recv_sys->limit_lsn = ut_dulint_max;
+#endif
+}
+
+/**********************************************************************
+Inits a log group to the log system. */
+
+void
+log_group_init(
+/*===========*/
+ ulint id, /* in: group id */
+ ulint n_files, /* in: number of log files */
+ ulint file_size, /* in: log file size in bytes */
+ ulint space_id, /* in: space id of the file space
+ which contains the log files of this
+ group */
+ ulint archive_space_id) /* in: space id of the file space
+ which contains some archived log
+ files for this group; currently, only
+ for the first log group this is
+ used */
+{
+ ulint i;
+
+ log_group_t* group;
+
+ group = mem_alloc(sizeof(log_group_t));
+
+ group->id = id;
+ group->n_files = n_files;
+ group->file_size = file_size;
+ group->space_id = space_id;
+ group->state = LOG_GROUP_OK;
+ group->lsn = LOG_START_LSN;
+ group->lsn_offset = LOG_FILE_HDR_SIZE;
+ group->n_pending_writes = 0;
+
+ group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
+ group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files);
+
+ for (i = 0; i < n_files; i++) {
+ *(group->file_header_bufs + i) = ut_align(
+ mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+ *(group->archive_file_header_bufs + i) = ut_align(
+ mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+ }
+
+ group->archive_space_id = archive_space_id;
+
+ group->archived_file_no = 0;
+ group->archived_offset = 0;
+
+ group->checkpoint_buf = ut_align(
+ mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE),
+ OS_FILE_LOG_BLOCK_SIZE);
+
+ UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group);
+
+ ut_a(log_calc_max_ages());
+}
+
+/**********************************************************************
+Does the unlockings needed in flush i/o completion. */
+UNIV_INLINE
+void
+log_flush_do_unlocks(
+/*=================*/
+ ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK
+ and LOG_UNLOCK_NONE_FLUSHED_LOCK */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ /* NOTE that we must own the log mutex when doing the setting of the
+ events: this is because transactions will wait for these events to
+ be set, and at that moment the log flush they were waiting for must
+ have ended. If the log mutex were not reserved here, the i/o-thread
+ calling this function might be preempted for a while, and when it
+ resumed execution, it might be that a new flush had been started, and
+ this function would erroneously signal the NEW flush as completed.
+ Thus, the changes in the state of these events are performed
+ atomically in conjunction with the changes in the state of
+ log_sys->n_pending_writes etc. */
+
+ if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) {
+ os_event_set(log_sys->one_flushed_event);
+ }
+
+ if (code & LOG_UNLOCK_FLUSH_LOCK) {
+ os_event_set(log_sys->no_flush_event);
+ }
+}
+
+/**********************************************************************
+Checks if a flush is completed for a log group and does the completion
+routine if yes. */
+UNIV_INLINE
+ulint
+log_group_check_flush_completion(
+/*=============================*/
+ /* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */
+ log_group_t* group) /* in: log group */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (!log_sys->one_flushed && (group->n_pending_writes == 0)) {
+
+ if (log_debug_writes) {
+ printf("Log flushed first to group %lu\n", group->id);
+ }
+
+ log_sys->written_to_some_lsn = log_sys->flush_lsn;
+ log_sys->one_flushed = TRUE;
+
+ return(LOG_UNLOCK_NONE_FLUSHED_LOCK);
+ }
+
+ if (log_debug_writes && (group->n_pending_writes == 0)) {
+
+ printf("Log flushed to group %lu\n", group->id);
+ }
+
+ return(0);
+}
+
+/**********************************************************
+Checks if a flush is completed and does the completion routine if yes. */
+static
+ulint
+log_sys_check_flush_completion(void)
+/*================================*/
+ /* out: LOG_UNLOCK_FLUSH_LOCK or 0 */
+{
+ ulint move_start;
+ ulint move_end;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (log_sys->n_pending_writes == 0) {
+
+ log_sys->written_to_all_lsn = log_sys->flush_lsn;
+ log_sys->buf_next_to_write = log_sys->flush_end_offset;
+
+ if (log_sys->flush_end_offset > log_sys->max_buf_free / 2) {
+ /* Move the log buffer content to the start of the
+ buffer */
+
+ move_start = ut_calc_align_down(
+ log_sys->flush_end_offset,
+ OS_FILE_LOG_BLOCK_SIZE);
+ move_end = ut_calc_align(log_sys->buf_free,
+ OS_FILE_LOG_BLOCK_SIZE);
+
+ ut_memmove(log_sys->buf, log_sys->buf + move_start,
+ move_end - move_start);
+ log_sys->buf_free -= move_start;
+
+ log_sys->buf_next_to_write -= move_start;
+ }
+
+ return(LOG_UNLOCK_FLUSH_LOCK);
+ }
+
+ return(0);
+}
+
+/**********************************************************
+Completes an i/o to a log file. */
+
+void
+log_io_complete(
+/*============*/
+ log_group_t* group) /* in: log group or a dummy pointer */
+{
+ ulint unlock;
+
+ if ((byte*)group == &log_archive_io) {
+ /* It was an archive write */
+
+ log_io_complete_archive();
+
+ return;
+ }
+
+ if ((ulint)group & 0x1) {
+ /* It was a checkpoint write */
+ group = (log_group_t*)((ulint)group - 1);
+
+ fil_flush(group->space_id);
+
+ log_io_complete_checkpoint(group);
+
+ return;
+ }
+
+ fil_flush(group->space_id);
+
+ mutex_enter(&(log_sys->mutex));
+
+ ut_ad(group->n_pending_writes > 0);
+ ut_ad(log_sys->n_pending_writes > 0);
+
+ group->n_pending_writes--;
+ log_sys->n_pending_writes--;
+
+ unlock = log_group_check_flush_completion(group);
+ unlock = unlock | log_sys_check_flush_completion();
+
+ log_flush_do_unlocks(unlock);
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/**********************************************************
+Writes a log file header to a log file space. */
+static
+void
+log_group_file_header_flush(
+/*========================*/
+ ulint type, /* in: LOG_FLUSH or LOG_RECOVER */
+ log_group_t* group, /* in: log group */
+ ulint nth_file, /* in: header to the nth file in the
+ log file space */
+ dulint start_lsn) /* in: log file data starts at this
+ lsn */
+{
+ byte* buf;
+ ulint dest_offset;
+ ibool sync;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ ut_a(nth_file < group->n_files);
+
+ buf = *(group->file_header_bufs + nth_file);
+
+ mach_write_to_4(buf + LOG_GROUP_ID, group->id);
+ mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+
+ dest_offset = nth_file * group->file_size;
+
+ sync = FALSE;
+
+ if (type == LOG_RECOVER) {
+
+ sync = TRUE;
+ }
+
+ if (log_debug_writes) {
+ printf(
+ "Writing log file header to group %lu file %lu\n", group->id,
+ nth_file);
+ }
+
+ if (log_do_write) {
+ if (type == LOG_FLUSH) {
+ log_sys->n_pending_writes++;
+ group->n_pending_writes++;
+ }
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id,
+ dest_offset / UNIV_PAGE_SIZE,
+ dest_offset % UNIV_PAGE_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE,
+ buf, group);
+ }
+}
+
+/**********************************************************
+Writes a buffer to a log file group. */
+
+void
+log_group_write_buf(
+/*================*/
+ ulint type, /* in: LOG_FLUSH or LOG_RECOVER */
+ log_group_t* group, /* in: log group */
+ byte* buf, /* in: buffer */
+ ulint len, /* in: buffer len; must be divisible
+ by OS_FILE_LOG_BLOCK_SIZE */
+ dulint start_lsn, /* in: start lsn of the buffer; must
+ be divisible by
+ OS_FILE_LOG_BLOCK_SIZE */
+ ulint new_data_offset)/* in: start offset of new data in
+ buf: this parameter is used to decide
+ if we have to write a new log file
+ header */
+{
+ ulint write_len;
+ ibool sync;
+ ibool write_header;
+ ulint next_offset;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
+ ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ sync = FALSE;
+
+ if (type == LOG_RECOVER) {
+
+ sync = TRUE;
+ }
+
+ if (new_data_offset == 0) {
+ write_header = TRUE;
+ } else {
+ write_header = FALSE;
+ }
+loop:
+ if (len == 0) {
+
+ return;
+ }
+
+ next_offset = log_group_calc_lsn_offset(start_lsn, group);
+
+ if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE)
+ && write_header) {
+ /* We start to write a new log file instance in the group */
+
+ log_group_file_header_flush(type, group,
+ next_offset / group->file_size, start_lsn);
+ }
+
+ if ((next_offset % group->file_size) + len > group->file_size) {
+
+ write_len = group->file_size - (next_offset % group->file_size);
+ } else {
+ write_len = len;
+ }
+
+ if (log_debug_writes) {
+ printf(
+ "Writing log file segment to group %lu offset %lu len %lu\n",
+ group->id, next_offset, write_len);
+ }
+
+ if (log_do_write) {
+ if (type == LOG_FLUSH) {
+ log_sys->n_pending_writes++;
+ group->n_pending_writes++;
+ }
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, sync, group->space_id,
+ next_offset / UNIV_PAGE_SIZE,
+ next_offset % UNIV_PAGE_SIZE, write_len, buf, group);
+ }
+
+ if (write_len < len) {
+ start_lsn = ut_dulint_add(start_lsn, write_len);
+ len -= write_len;
+ buf += write_len;
+
+ write_header = TRUE;
+
+ goto loop;
+ }
+}
+
+/**********************************************************
+This function is called, e.g., when a transaction wants to commit. It checks
+that the log has been flushed to disk up to the last log entry written by the
+transaction. If there is a flush running, it waits and checks if the flush
+flushed enough. If not, starts a new flush. */
+
+void
+log_flush_up_to(
+/*============*/
+ dulint lsn, /* in: log sequence number up to which the log should
+ be flushed, ut_dulint_max if not specified */
+ ulint wait) /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP,
+ or LOG_WAIT_ALL_GROUPS */
+{
+ log_group_t* group;
+ ulint start_offset;
+ ulint end_offset;
+ ulint area_start;
+ ulint area_end;
+ ulint loop_count;
+
+ if (recv_no_ibuf_operations) {
+ /* Recovery is running and no operations on the log files are
+ allowed yet (the variable name .._no_ibuf_.. is misleading) */
+
+ return;
+ }
+
+ loop_count = 0;
+loop:
+ loop_count++;
+
+ ut_ad(loop_count < 5);
+
+ if (loop_count > 2) {
+/* printf("Log loop count %lu\n", loop_count); */
+ }
+
+ mutex_enter(&(log_sys->mutex));
+
+ if ((ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0)
+ || ((ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) >= 0)
+ && (wait != LOG_WAIT_ALL_GROUPS))) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ if (log_sys->n_pending_writes > 0) {
+ /* A flush is running */
+
+ if (ut_dulint_cmp(log_sys->flush_lsn, lsn) >= 0) {
+ /* The flush will flush enough: wait for it to
+ complete */
+
+ goto do_waits;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Wait for the flush to complete and try to start a new
+ flush */
+
+ os_event_wait(log_sys->no_flush_event);
+
+ goto loop;
+ }
+
+ if (log_sys->buf_free == log_sys->buf_next_to_write) {
+ /* Nothing to flush */
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ if (log_debug_writes) {
+ printf("Flushing log from %lu %lu up to lsn %lu %lu\n",
+ ut_dulint_get_high(log_sys->written_to_all_lsn),
+ ut_dulint_get_low(log_sys->written_to_all_lsn),
+ ut_dulint_get_high(log_sys->lsn),
+ ut_dulint_get_low(log_sys->lsn));
+ }
+
+ os_event_reset(log_sys->no_flush_event);
+ os_event_reset(log_sys->one_flushed_event);
+
+ start_offset = log_sys->buf_next_to_write;
+ end_offset = log_sys->buf_free;
+
+ area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
+ area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
+
+ ut_ad(area_end - area_start > 0);
+
+ log_sys->flush_lsn = log_sys->lsn;
+ log_sys->one_flushed = FALSE;
+
+ log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
+ log_block_set_checkpoint_no(
+ log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
+ log_sys->next_checkpoint_no);
+
+ /* Copy the last, incompletely written, log block a log block length
+ up, so that when the flush operation writes from the log buffer, the
+ segment to write will not be changed by writers to the log */
+
+ ut_memcpy(log_sys->buf + area_end,
+ log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE);
+
+ log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE;
+ log_sys->flush_end_offset = log_sys->buf_free;
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ log_group_write_buf(LOG_FLUSH, group,
+ log_sys->buf + area_start,
+ area_end - area_start,
+ ut_dulint_align_down(log_sys->written_to_all_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ start_offset - area_start);
+
+ log_group_set_fields(group, log_sys->flush_lsn);
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+
+do_waits:
+ mutex_exit(&(log_sys->mutex));
+
+ if (wait == LOG_WAIT_ONE_GROUP) {
+ os_event_wait(log_sys->one_flushed_event);
+ } else if (wait == LOG_WAIT_ALL_GROUPS) {
+ os_event_wait(log_sys->no_flush_event);
+ } else {
+ ut_ad(wait == LOG_NO_WAIT);
+ }
+}
+
+/********************************************************************
+Tries to establish a big enough margin of free space in the log buffer, such
+that a new log entry can be catenated without an immediate need for a flush. */
+static
+void
+log_flush_margin(void)
+/*==================*/
+{
+ ibool do_flush = FALSE;
+ log_t* log = log_sys;
+
+ mutex_enter(&(log->mutex));
+
+ if (log->buf_free > log->max_buf_free) {
+
+ if (log->n_pending_writes > 0) {
+ /* A flush is running: hope that it will provide enough
+ free space */
+ } else {
+ do_flush = TRUE;
+ }
+ }
+
+ mutex_exit(&(log->mutex));
+
+ if (do_flush) {
+ log_flush_up_to(ut_dulint_max, LOG_NO_WAIT);
+ }
+}
+
+/********************************************************************
+Advances the smallest lsn for which there are unflushed dirty blocks in the
+buffer pool. NOTE: this function may only be called if the calling thread owns
+no synchronization objects! */
+
+ibool
+log_preflush_pool_modified_pages(
+/*=============================*/
+ /* out: FALSE if there was a flush batch of
+ the same type running, which means that we
+ could not start this flush batch */
+ dulint new_oldest, /* in: try to advance oldest_modified_lsn
+ at least to this lsn */
+ ibool sync) /* in: TRUE if synchronous operation is
+ desired */
+{
+ ulint n_pages;
+
+ if (recv_recovery_on) {
+ /* If the recovery is running, we must first apply all
+ log records to their respective file pages to get the
+ right modify lsn values to these pages: otherwise, there
+ might be pages on disk which are not yet recovered to the
+ current lsn, and even after calling this function, we could
+ not know how up-to-date the disk version of the database is,
+ and we could not make a new checkpoint on the basis of the
+ info on the buffer pool only. */
+
+ recv_apply_hashed_log_recs(TRUE);
+ }
+
+ n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest);
+
+ if (sync) {
+ buf_flush_wait_batch_end(BUF_FLUSH_LIST);
+ }
+
+ if (n_pages == ULINT_UNDEFINED) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************
+Completes a checkpoint. */
+static
+void
+log_complete_checkpoint(void)
+/*=========================*/
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(log_sys->n_pending_checkpoint_writes == 0);
+
+ log_sys->next_checkpoint_no
+ = ut_dulint_add(log_sys->next_checkpoint_no, 1);
+
+ log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
+
+ rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
+}
+
+/**********************************************************
+Completes an asynchronous checkpoint info write i/o to a log file. */
+static
+void
+log_io_complete_checkpoint(
+/*=======================*/
+ log_group_t* group) /* in: log group */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ ut_ad(log_sys->n_pending_checkpoint_writes > 0);
+
+ log_sys->n_pending_checkpoint_writes--;
+
+ if (log_debug_writes) {
+ printf("Checkpoint info written to group %lu\n", group->id);
+ }
+
+ if (log_sys->n_pending_checkpoint_writes == 0) {
+ log_complete_checkpoint();
+ }
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/***********************************************************************
+Writes info to a checkpoint about a log group. */
+static
+void
+log_checkpoint_set_nth_group_info(
+/*==============================*/
+ byte* buf, /* in: buffer for checkpoint info */
+ ulint n, /* in: nth slot */
+ ulint file_no,/* in: archived file number */
+ ulint offset) /* in: archived file offset */
+{
+ ut_ad(n < LOG_MAX_N_GROUPS);
+
+ mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
+ + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no);
+ mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
+ + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset);
+}
+
+/***********************************************************************
+Gets info from a checkpoint about a log group. */
+
+void
+log_checkpoint_get_nth_group_info(
+/*==============================*/
+ byte* buf, /* in: buffer containing checkpoint info */
+ ulint n, /* in: nth slot */
+ ulint* file_no,/* out: archived file number */
+ ulint* offset) /* out: archived file offset */
+{
+ ut_ad(n < LOG_MAX_N_GROUPS);
+
+ *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
+ + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO);
+ *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY
+ + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET);
+}
+
+/**********************************************************
+Writes the checkpoint info to a log group header. */
+static
+void
+log_group_checkpoint(
+/*=================*/
+ log_group_t* group) /* in: log group */
+{
+ log_group_t* group2;
+ dulint archived_lsn;
+ dulint next_archived_lsn;
+ ulint write_offset;
+ ulint fold;
+ byte* buf;
+ ulint i;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_ad(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
+
+ buf = group->checkpoint_buf;
+
+ mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
+ mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
+
+ mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
+ log_group_calc_lsn_offset(
+ log_sys->next_checkpoint_lsn, group));
+
+ mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ archived_lsn = ut_dulint_max;
+ } else {
+ archived_lsn = log_sys->archived_lsn;
+
+ if (0 != ut_dulint_cmp(archived_lsn,
+ log_sys->next_archived_lsn)) {
+ next_archived_lsn = log_sys->next_archived_lsn;
+ /* For debugging only */
+ }
+ }
+
+ mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn);
+
+ for (i = 0; i < LOG_MAX_N_GROUPS; i++) {
+ log_checkpoint_set_nth_group_info(buf, i, 0, 0);
+ }
+
+ group2 = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group2) {
+ log_checkpoint_set_nth_group_info(buf, group2->id,
+ group2->archived_file_no,
+ group2->archived_offset);
+
+ group2 = UT_LIST_GET_NEXT(log_groups, group2);
+ }
+
+ fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
+ mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
+
+ fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+ LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
+ mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
+
+ /* We alternate the physical place of the checkpoint info in the first
+ log file */
+
+ if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) {
+ write_offset = LOG_CHECKPOINT_1;
+ } else {
+ write_offset = LOG_CHECKPOINT_2;
+ }
+
+ if (log_do_write) {
+ if (log_sys->n_pending_checkpoint_writes == 0) {
+
+ rw_lock_x_lock_gen(&(log_sys->checkpoint_lock),
+ LOG_CHECKPOINT);
+ }
+
+ log_sys->n_pending_checkpoint_writes++;
+
+ log_sys->n_log_ios++;
+
+ /* We send as the last parameter the group machine address
+ added with 1, as we want to distinguish between a normal log
+ file write and a checkpoint field write */
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id,
+ write_offset / UNIV_PAGE_SIZE,
+ write_offset % UNIV_PAGE_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE,
+ buf, ((byte*)group + 1));
+
+ ut_ad(((ulint)group & 0x1) == 0);
+ }
+}
+
+/**********************************************************
+Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */
+
+void
+log_group_read_checkpoint_info(
+/*===========================*/
+ log_group_t* group, /* in: log group */
+ ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id,
+ field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
+}
+
+/**********************************************************
+Writes checkpoint info to groups. */
+
+void
+log_groups_write_checkpoint_info(void)
+/*==================================*/
+{
+ log_group_t* group;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ while (group) {
+ log_group_checkpoint(group);
+
+ group = UT_LIST_GET_NEXT(log_groups, group);
+ }
+}
+
+/**********************************************************
+Makes a checkpoint. Note that this function does not flush dirty
+blocks from the buffer pool: it only checks what is lsn of the oldest
+modification in the pool, and writes information about the lsn in
+log files. Use log_make_checkpoint_at to flush also the pool. */
+
+ibool
+log_checkpoint(
+/*===========*/
+ /* out: TRUE if success, FALSE if a checkpoint
+ write was already running */
+ ibool sync, /* in: TRUE if synchronous operation is
+ desired */
+ ibool write_always) /* in: the function normally checks if the
+ the new checkpoint would have a greater
+ lsn than the previous one: if not, then no
+ physical write is done; by setting this
+ parameter TRUE, a physical write will always be
+ made to log files */
+{
+ dulint oldest_lsn;
+
+ if (recv_recovery_is_on()) {
+ recv_apply_hashed_log_recs(TRUE);
+ }
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ mutex_enter(&(log_sys->mutex));
+
+ oldest_lsn = log_buf_pool_get_oldest_modification();
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Because log also contains headers and dummy log records,
+ if the buffer pool contains no dirty buffers, oldest_lsn
+ gets the value log_sys->lsn from the previous function,
+ and we must make sure that the log is flushed up to that
+ lsn. If there are dirty buffers in the buffer pool, then our
+ write-ahead-logging algorithm ensures that the log has been flushed
+ up to oldest_lsn. */
+
+ log_flush_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS);
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (!write_always && ut_dulint_cmp(
+ log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(TRUE);
+ }
+
+ ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0);
+
+ if (log_sys->n_pending_checkpoint_writes > 0) {
+ /* A checkpoint write is running */
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (sync) {
+ /* Wait for the checkpoint write to complete */
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ }
+
+ return(FALSE);
+ }
+
+ log_sys->next_checkpoint_lsn = oldest_lsn;
+
+ if (log_debug_writes) {
+ printf("Making checkpoint no %lu at lsn %lu %lu\n",
+ ut_dulint_get_low(log_sys->next_checkpoint_no),
+ ut_dulint_get_high(oldest_lsn),
+ ut_dulint_get_low(oldest_lsn));
+ }
+
+ log_groups_write_checkpoint_info();
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (sync) {
+ /* Wait for the checkpoint write to complete */
+ rw_lock_s_lock(&(log_sys->checkpoint_lock));
+ rw_lock_s_unlock(&(log_sys->checkpoint_lock));
+ }
+
+ return(TRUE);
+}
+
+/********************************************************************
+Makes a checkpoint at a given lsn or later. */
+
+void
+log_make_checkpoint_at(
+/*===================*/
+ dulint lsn, /* in: make a checkpoint at this or a later
+ lsn, if ut_dulint_max, makes a checkpoint at
+ the latest lsn */
+ ibool write_always) /* in: the function normally checks if the
+ the new checkpoint would have a greater
+ lsn than the previous one: if not, then no
+ physical write is done; by setting this
+ parameter TRUE, a physical write will always be
+ made to log files */
+{
+ ibool success;
+
+ /* Preflush pages synchronously */
+
+ success = FALSE;
+
+ while (!success) {
+ success = log_preflush_pool_modified_pages(lsn, TRUE);
+ }
+
+ success = FALSE;
+
+ while (!success) {
+ success = log_checkpoint(TRUE, write_always);
+ }
+}
+
+/********************************************************************
+Tries to establish a big enough margin of free space in the log groups, such
+that a new log entry can be catenated without an immediate need for a
+checkpoint. NOTE: this function may only be called if the calling thread
+owns no synchronization objects! */
+static
+void
+log_checkpoint_margin(void)
+/*=======================*/
+{
+ log_t* log = log_sys;
+ ulint age;
+ ulint checkpoint_age;
+ ulint advance;
+ dulint oldest_lsn;
+ dulint new_oldest;
+ ibool do_preflush;
+ ibool sync;
+ ibool checkpoint_sync;
+ ibool do_checkpoint;
+ ibool success;
+loop:
+ sync = FALSE;
+ checkpoint_sync = FALSE;
+ do_preflush = FALSE;
+ do_checkpoint = FALSE;
+
+ mutex_enter(&(log->mutex));
+
+ if (log->check_flush_or_checkpoint == FALSE) {
+ mutex_exit(&(log->mutex));
+
+ return;
+ }
+
+ oldest_lsn = log_buf_pool_get_oldest_modification();
+
+ age = ut_dulint_minus(log->lsn, oldest_lsn);
+
+ if (age > log->max_modified_age_sync) {
+
+ /* A flush is urgent: we have to do a synchronous preflush */
+
+ sync = TRUE;
+
+ advance = 2 * (age - log->max_modified_age_sync);
+
+ new_oldest = ut_dulint_add(oldest_lsn, advance);
+
+ do_preflush = TRUE;
+
+ } else if (age > log->max_modified_age_async) {
+
+ /* A flush is not urgent: we do an asynchronous preflush */
+ advance = age - log->max_modified_age_async;
+
+ new_oldest = ut_dulint_add(oldest_lsn, advance);
+
+ do_preflush = TRUE;
+ }
+
+ checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn);
+
+ if (checkpoint_age > log->max_checkpoint_age) {
+ /* A checkpoint is urgent: we do it synchronously */
+
+ checkpoint_sync = TRUE;
+
+ do_checkpoint = TRUE;
+
+ } else if (checkpoint_age > log->max_checkpoint_age_async) {
+ /* A checkpoint is not urgent: do it asynchronously */
+
+ do_checkpoint = TRUE;
+
+ log->check_flush_or_checkpoint = FALSE;
+ } else {
+ log->check_flush_or_checkpoint = FALSE;
+ }
+
+ mutex_exit(&(log->mutex));
+
+ if (do_preflush) {
+ success = log_preflush_pool_modified_pages(new_oldest, sync);
+
+ /* If the flush succeeded, this thread has done its part
+ and can proceed. If it did not succeed, there was another
+ thread doing a flush at the same time. If sync was FALSE,
+ the flush was not urgent, and we let this thread proceed.
+ Otherwise, we let it start from the beginning again. */
+
+ if (sync && !success) {
+ mutex_enter(&(log->mutex));
+
+ log->check_flush_or_checkpoint = TRUE;
+
+ mutex_exit(&(log->mutex));
+ goto loop;
+ }
+ }
+
+ if (do_checkpoint) {
+ log_checkpoint(checkpoint_sync, FALSE);
+
+ if (checkpoint_sync) {
+
+ goto loop;
+ }
+ }
+}
+
+/**********************************************************
+Reads a specified log segment to a buffer. */
+
+void
+log_group_read_log_seg(
+/*===================*/
+ ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */
+ byte* buf, /* in: buffer where to read */
+ log_group_t* group, /* in: log group */
+ dulint start_lsn, /* in: read area start */
+ dulint end_lsn) /* in: read area end */
+{
+ ulint len;
+ ulint source_offset;
+ ibool sync;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ sync = FALSE;
+
+ if (type == LOG_RECOVER) {
+ sync = TRUE;
+ }
+loop:
+ source_offset = log_group_calc_lsn_offset(start_lsn, group);
+
+ len = ut_dulint_minus(end_lsn, start_lsn);
+
+ ut_ad(len != 0);
+
+ if ((source_offset % group->file_size) + len > group->file_size) {
+
+ len = group->file_size - (source_offset % group->file_size);
+ }
+
+ if (type == LOG_ARCHIVE) {
+
+ log_sys->n_pending_archive_ios++;
+ }
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id,
+ source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE,
+ len, buf, &log_archive_io);
+
+ start_lsn = ut_dulint_add(start_lsn, len);
+ buf += len;
+
+ if (ut_dulint_cmp(start_lsn, end_lsn) != 0) {
+
+ goto loop;
+ }
+}
+
+/**********************************************************
+Generates an archived log file name. */
+
+void
+log_archived_file_name_gen(
+/*=======================*/
+ char* buf, /* in: buffer where to write */
+ ulint id, /* in: group id */
+ ulint file_no)/* in: file number */
+{
+ UT_NOT_USED(id); /* Currently we only archive the first group */
+
+ sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, file_no);
+}
+
+/**********************************************************
+Writes a log file header to a log file space. */
+static
+void
+log_group_archive_file_header_write(
+/*================================*/
+ log_group_t* group, /* in: log group */
+ ulint nth_file, /* in: header to the nth file in the
+ archive log file space */
+ ulint file_no, /* in: archived file number */
+ dulint start_lsn) /* in: log file data starts at this
+ lsn */
+{
+ byte* buf;
+ ulint dest_offset;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ ut_a(nth_file < group->n_files);
+
+ buf = *(group->archive_file_header_bufs + nth_file);
+
+ mach_write_to_4(buf + LOG_GROUP_ID, group->id);
+ mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn);
+ mach_write_to_4(buf + LOG_FILE_NO, file_no);
+
+ mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE);
+
+ dest_offset = nth_file * group->file_size;
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
+ dest_offset / UNIV_PAGE_SIZE,
+ dest_offset % UNIV_PAGE_SIZE,
+ 2 * OS_FILE_LOG_BLOCK_SIZE,
+ buf, &log_archive_io);
+}
+
+/**********************************************************
+Writes a log file header to a completed archived log file. */
+static
+void
+log_group_archive_completed_header_write(
+/*=====================================*/
+ log_group_t* group, /* in: log group */
+ ulint nth_file, /* in: header to the nth file in the
+ archive log file space */
+ dulint end_lsn) /* in: end lsn of the file */
+{
+ byte* buf;
+ ulint dest_offset;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+ ut_a(nth_file < group->n_files);
+
+ buf = *(group->archive_file_header_bufs + nth_file);
+
+ mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE);
+ mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn);
+
+ dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED;
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id,
+ dest_offset / UNIV_PAGE_SIZE,
+ dest_offset % UNIV_PAGE_SIZE,
+ OS_FILE_LOG_BLOCK_SIZE,
+ buf + LOG_FILE_ARCH_COMPLETED,
+ &log_archive_io);
+}
+
+/**********************************************************
+Does the archive writes for a single log group. */
+static
+void
+log_group_archive(
+/*==============*/
+ log_group_t* group) /* in: log group */
+{
+ os_file_t file_handle;
+ dulint start_lsn;
+ dulint end_lsn;
+ char name[100];
+ byte* buf;
+ ulint len;
+ ibool ret;
+ ulint next_offset;
+ ulint n_files;
+ ulint open_mode;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ start_lsn = log_sys->archived_lsn;
+
+ ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ end_lsn = log_sys->next_archived_lsn;
+
+ ut_ad(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ buf = log_sys->archive_buf;
+
+ n_files = 0;
+
+ next_offset = group->archived_offset;
+loop:
+ if ((next_offset % group->file_size == 0)
+ || (fil_space_get_size(group->archive_space_id) == 0)) {
+
+ /* Add the file to the archive file space; create or open the
+ file */
+
+ if (next_offset % group->file_size == 0) {
+ open_mode = OS_FILE_CREATE;
+ } else {
+ open_mode = OS_FILE_OPEN;
+ }
+
+ log_archived_file_name_gen(name, group->id,
+ group->archived_file_no + n_files);
+ fil_reserve_right_to_open();
+
+ file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
+ &ret);
+ if (!ret && (open_mode == OS_FILE_CREATE)) {
+ file_handle = os_file_create(name, OS_FILE_OPEN,
+ OS_FILE_AIO, &ret);
+ }
+
+ ut_a(ret);
+
+ if (log_debug_writes) {
+ printf("Created archive file %s\n", name);
+ }
+
+ ret = os_file_close(file_handle);
+
+ ut_a(ret);
+
+ fil_release_right_to_open();
+
+ /* Add the archive file as a node to the space */
+
+ fil_node_create(name, group->file_size / UNIV_PAGE_SIZE,
+ group->archive_space_id);
+
+ if (next_offset % group->file_size == 0) {
+ log_group_archive_file_header_write(group, n_files,
+ group->archived_file_no + n_files,
+ start_lsn);
+
+ next_offset += LOG_FILE_HDR_SIZE;
+ }
+ }
+
+ len = ut_dulint_minus(end_lsn, start_lsn);
+
+ if (group->file_size < (next_offset % group->file_size) + len) {
+
+ len = group->file_size - (next_offset % group->file_size);
+ }
+
+ if (log_debug_writes) {
+ printf(
+ "Archiving starting at lsn %lu %lu, len %lu to group %lu\n",
+ ut_dulint_get_high(start_lsn),
+ ut_dulint_get_low(start_lsn),
+ len, group->id);
+ }
+
+ log_sys->n_pending_archive_ios++;
+
+ log_sys->n_log_ios++;
+
+ fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id,
+ next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE,
+ ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
+ &log_archive_io);
+
+ start_lsn = ut_dulint_add(start_lsn, len);
+ next_offset += len;
+ buf += len;
+
+ if (next_offset % group->file_size == 0) {
+ n_files++;
+ }
+
+ if (ut_dulint_cmp(end_lsn, start_lsn) != 0) {
+
+ goto loop;
+ }
+
+ group->next_archived_file_no = group->archived_file_no + n_files;
+ group->next_archived_offset = next_offset % group->file_size;
+
+ ut_ad(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
+}
+
+/*********************************************************
+(Writes to the archive of each log group.) Currently, only the first
+group is archived. */
+static
+void
+log_archive_groups(void)
+/*====================*/
+{
+ log_group_t* group;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ log_group_archive(group);
+}
+
+/*********************************************************
+Completes the archiving write phase for (each log group), currently,
+the first log group. */
+static
+void
+log_archive_write_complete_groups(void)
+/*===================================*/
+{
+ log_group_t* group;
+ ulint end_offset;
+ ulint trunc_files;
+ ulint n_files;
+ dulint start_lsn;
+ dulint end_lsn;
+ ulint i;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ group->archived_file_no = group->next_archived_file_no;
+ group->archived_offset = group->next_archived_offset;
+
+ /* Truncate from the archive file space all but the last
+ file, or if it has been written full, all files */
+
+ n_files = (UNIV_PAGE_SIZE
+ * fil_space_get_size(group->archive_space_id))
+ / group->file_size;
+ ut_ad(n_files > 0);
+
+ end_offset = group->archived_offset;
+
+ if (end_offset % group->file_size == 0) {
+
+ trunc_files = n_files;
+ } else {
+ trunc_files = n_files - 1;
+ }
+
+ if (log_debug_writes && trunc_files) {
+ printf("Complete file(s) archived to group %lu\n",
+ group->id);
+ }
+
+ /* Calculate the archive file space start lsn */
+ start_lsn = ut_dulint_subtract(log_sys->next_archived_lsn,
+ end_offset - LOG_FILE_HDR_SIZE
+ + trunc_files
+ * (group->file_size - LOG_FILE_HDR_SIZE));
+ end_lsn = start_lsn;
+
+ for (i = 0; i < trunc_files; i++) {
+
+ end_lsn = ut_dulint_add(end_lsn,
+ group->file_size - LOG_FILE_HDR_SIZE);
+
+ /* Write a notice to the headers of archived log
+ files that the file write has been completed */
+
+ log_group_archive_completed_header_write(group, i, end_lsn);
+ }
+
+ fil_space_truncate_start(group->archive_space_id,
+ trunc_files * group->file_size);
+
+ if (log_debug_writes) {
+ printf("Archiving writes completed\n");
+ }
+}
+
+/**********************************************************
+Completes an archiving i/o. */
+static
+void
+log_archive_check_completion_low(void)
+/*==================================*/
+{
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (log_sys->n_pending_archive_ios == 0
+ && log_sys->archiving_phase == LOG_ARCHIVE_READ) {
+
+ if (log_debug_writes) {
+ printf("Archiving read completed\n");
+ }
+
+ /* Archive buffer has now been read in: start archive writes */
+
+ log_sys->archiving_phase = LOG_ARCHIVE_WRITE;
+
+ log_archive_groups();
+ }
+
+ if (log_sys->n_pending_archive_ios == 0
+ && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) {
+
+ log_archive_write_complete_groups();
+
+ log_sys->archived_lsn = log_sys->next_archived_lsn;
+
+ rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
+ }
+}
+
+/**********************************************************
+Completes an archiving i/o. */
+static
+void
+log_io_complete_archive(void)
+/*=========================*/
+{
+ log_group_t* group;
+
+ mutex_enter(&(log_sys->mutex));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ mutex_exit(&(log_sys->mutex));
+
+ fil_flush(group->archive_space_id);
+
+ mutex_enter(&(log_sys->mutex));
+
+ ut_ad(log_sys->n_pending_archive_ios > 0);
+
+ log_sys->n_pending_archive_ios--;
+
+ log_archive_check_completion_low();
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/************************************************************************
+Starts an archiving operation. */
+
+ibool
+log_archive_do(
+/*===========*/
+ /* out: TRUE if succeed, FALSE if an archiving
+ operation was already running */
+ ibool sync, /* in: TRUE if synchronous operation is desired */
+ ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to
+ archive */
+{
+ ibool calc_new_limit;
+ dulint start_lsn;
+ dulint limit_lsn;
+
+ calc_new_limit = TRUE;
+loop:
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ mutex_exit(&(log_sys->mutex));
+
+ *n_bytes = 0;
+
+ return(TRUE);
+
+ } else if (log_sys->archiving_state == LOG_ARCH_STOPPED
+ || log_sys->archiving_state == LOG_ARCH_STOPPING2) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ os_event_wait(log_sys->archiving_on);
+
+ mutex_enter(&(log_sys->mutex));
+
+ goto loop;
+ }
+
+ start_lsn = log_sys->archived_lsn;
+
+ if (calc_new_limit) {
+ ut_ad(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0);
+
+ limit_lsn = ut_dulint_add(start_lsn,
+ log_sys->archive_buf_size);
+
+ *n_bytes = log_sys->archive_buf_size;
+
+ if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) {
+
+ limit_lsn = ut_dulint_align_down(log_sys->lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ }
+ }
+
+ if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ *n_bytes = 0;
+
+ return(TRUE);
+ }
+
+ if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_flush_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS);
+
+ calc_new_limit = FALSE;
+
+ goto loop;
+ }
+
+ if (log_sys->n_pending_archive_ios > 0) {
+ /* An archiving operation is running */
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (sync) {
+ rw_lock_s_lock(&(log_sys->archive_lock));
+ rw_lock_s_unlock(&(log_sys->archive_lock));
+ }
+
+ *n_bytes = log_sys->archive_buf_size;
+
+ return(FALSE);
+ }
+
+ rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE);
+
+ log_sys->archiving_phase = LOG_ARCHIVE_READ;
+
+ log_sys->next_archived_lsn = limit_lsn;
+
+ if (log_debug_writes) {
+ printf("Archiving from lsn %lu %lu to lsn %lu %lu\n",
+ ut_dulint_get_high(log_sys->archived_lsn),
+ ut_dulint_get_low(log_sys->archived_lsn),
+ ut_dulint_get_high(limit_lsn),
+ ut_dulint_get_low(limit_lsn));
+ }
+
+ /* Read the log segment to the archive buffer */
+
+ log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf,
+ UT_LIST_GET_FIRST(log_sys->log_groups),
+ start_lsn, limit_lsn);
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (sync) {
+ rw_lock_s_lock(&(log_sys->archive_lock));
+ rw_lock_s_unlock(&(log_sys->archive_lock));
+ }
+
+ *n_bytes = log_sys->archive_buf_size;
+
+ return(TRUE);
+}
+
+/********************************************************************
+Writes the log contents to the archive at least up to the lsn when this
+function was called. */
+static
+void
+log_archive_all(void)
+/*=================*/
+{
+ dulint present_lsn;
+ ulint dummy;
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ present_lsn = log_sys->lsn;
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_pad_current_log_block();
+
+ for (;;) {
+ mutex_enter(&(log_sys->mutex));
+
+ if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_archive_do(TRUE, &dummy);
+ }
+}
+
+/*********************************************************
+Closes the possible open archive log file (for each group) the first group,
+and if it was open, increments the group file count by 2, if desired. */
+static
+void
+log_archive_close_groups(
+/*=====================*/
+ ibool increment_file_count) /* in: TRUE if we want to increment
+ the file count */
+{
+ log_group_t* group;
+ ulint trunc_len;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ trunc_len = UNIV_PAGE_SIZE
+ * fil_space_get_size(group->archive_space_id);
+
+ if (trunc_len > 0) {
+ ut_a(trunc_len == group->file_size);
+
+ /* Write a notice to the headers of archived log
+ files that the file write has been completed */
+
+ log_group_archive_completed_header_write(group,
+ 0, log_sys->archived_lsn);
+
+ fil_space_truncate_start(group->archive_space_id,
+ trunc_len);
+ if (increment_file_count) {
+ group->archived_offset = 0;
+ group->archived_file_no += 2;
+ }
+
+ if (log_debug_writes) {
+ printf(
+ "Incrementing arch file no to %lu in log group %lu\n",
+ group->archived_file_no + 2, group->id);
+ }
+ }
+}
+
+/********************************************************************
+Writes the log contents to the archive up to the lsn when this function was
+called, and stops the archiving. When archiving is started again, the archived
+log file numbers start from 2 higher, so that the archiving will
+not write again to the archived log files which exist when this function
+returns. */
+
+ulint
+log_archive_stop(void)
+/*==================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ ibool success;
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state != LOG_ARCH_ON) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ log_sys->archiving_state = LOG_ARCH_STOPPING;
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_archive_all();
+
+ mutex_enter(&(log_sys->mutex));
+
+ log_sys->archiving_state = LOG_ARCH_STOPPING2;
+ os_event_reset(log_sys->archiving_on);
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Wait for a possible archiving operation to end */
+
+ rw_lock_s_lock(&(log_sys->archive_lock));
+ rw_lock_s_unlock(&(log_sys->archive_lock));
+
+ mutex_enter(&(log_sys->mutex));
+
+ /* Close all archived log files, incrementing the file count by 2,
+ if appropriate */
+
+ log_archive_close_groups(TRUE);
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Make a checkpoint, so that if recovery is needed, the file numbers
+ of new archived log files will start from the right value */
+
+ success = FALSE;
+
+ while (!success) {
+ success = log_checkpoint(TRUE, TRUE);
+ }
+
+ mutex_enter(&(log_sys->mutex));
+
+ log_sys->archiving_state = LOG_ARCH_STOPPED;
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Starts again archiving which has been stopped. */
+
+ulint
+log_archive_start(void)
+/*===================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state != LOG_ARCH_STOPPED) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ log_sys->archiving_state = LOG_ARCH_ON;
+
+ os_event_set(log_sys->archiving_on);
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Stop archiving the log so that a gap may occur in the archived log files. */
+
+ulint
+log_archive_noarchivelog(void)
+/*==========================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+loop:
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state == LOG_ARCH_STOPPED
+ || log_sys->archiving_state == LOG_ARCH_OFF) {
+
+ log_sys->archiving_state = LOG_ARCH_OFF;
+
+ os_event_set(log_sys->archiving_on);
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ log_archive_stop();
+
+ os_thread_sleep(500000);
+
+ goto loop;
+}
+
+/********************************************************************
+Start archiving the log so that a gap may occur in the archived log files. */
+
+ulint
+log_archive_archivelog(void)
+/*========================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+
+ log_sys->archiving_state = LOG_ARCH_ON;
+
+ log_sys->archived_lsn = ut_dulint_align_down(log_sys->lsn,
+ OS_FILE_LOG_BLOCK_SIZE);
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+}
+
+/********************************************************************
+Tries to establish a big enough margin of free space in the log groups, such
+that a new log entry can be catenated without an immediate need for
+archiving. */
+static
+void
+log_archive_margin(void)
+/*====================*/
+{
+ log_t* log = log_sys;
+ ulint age;
+ ibool sync;
+ ulint dummy;
+loop:
+ mutex_enter(&(log->mutex));
+
+ if (log->archiving_state == LOG_ARCH_OFF) {
+ mutex_exit(&(log->mutex));
+
+ return;
+ }
+
+ age = ut_dulint_minus(log->lsn, log->archived_lsn);
+
+ if (age > log->max_archived_lsn_age) {
+
+ /* An archiving is urgent: we have to do synchronous i/o */
+
+ sync = TRUE;
+
+ } else if (age > log->max_archived_lsn_age_async) {
+
+ /* An archiving is not urgent: we do asynchronous i/o */
+
+ sync = FALSE;
+ } else {
+ /* No archiving required yet */
+
+ mutex_exit(&(log->mutex));
+
+ return;
+ }
+
+ mutex_exit(&(log->mutex));
+
+ log_archive_do(sync, &dummy);
+
+ if (sync == TRUE) {
+ /* Check again that enough was written to the archive */
+
+ goto loop;
+ }
+}
+
+/************************************************************************
+Checks that there is enough free space in the log to start a new query step.
+Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
+function may only be called if the calling thread owns no synchronization
+objects! */
+
+void
+log_check_margins(void)
+/*===================*/
+{
+loop:
+ log_flush_margin();
+
+ log_checkpoint_margin();
+
+ log_archive_margin();
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->check_flush_or_checkpoint) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ goto loop;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+}
+
+/**********************************************************
+Switches the database to the online backup state. */
+
+ulint
+log_switch_backup_state_on(void)
+/*============================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ dulint backup_lsn;
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->online_backup_state) {
+
+ /* The database is already in that state */
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ log_sys->online_backup_state = TRUE;
+
+ backup_lsn = log_sys->lsn;
+
+ log_sys->online_backup_lsn = backup_lsn;
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* log_checkpoint_and_mark_file_spaces(); */
+
+ return(DB_SUCCESS);
+}
+
+/**********************************************************
+Switches the online backup state off. */
+
+ulint
+log_switch_backup_state_off(void)
+/*=============================*/
+ /* out: DB_SUCCESS or DB_ERROR */
+{
+ mutex_enter(&(log_sys->mutex));
+
+ if (!log_sys->online_backup_state) {
+
+ /* The database is already in that state */
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_ERROR);
+ }
+
+ log_sys->online_backup_state = FALSE;
+
+ mutex_exit(&(log_sys->mutex));
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Makes a checkpoint at the latest lsn and writes it to first page of each
+data file in the database, so that we know that the file spaces contain
+all modifications up to that lsn. This can only be called at database
+shutdown. This function also writes all log in log files to the log archive. */
+
+void
+logs_empty_and_mark_files_at_shutdown(void)
+/*=======================================*/
+{
+ dulint lsn;
+ ulint arch_log_no;
+
+ fprintf(stderr, "Innobase: Starting shutdown...\n");
+
+ /* Wait until the master thread and all other operations are idle: our
+ algorithm only works if the server is idle at shutdown */
+loop:
+ os_thread_sleep(100000);
+
+ mutex_enter(&kernel_mutex);
+
+ if (trx_n_mysql_transactions > 0
+ || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
+
+ mutex_exit(&kernel_mutex);
+
+ goto loop;
+ }
+
+ if (srv_n_threads_active[SRV_MASTER] != 0) {
+
+ mutex_exit(&kernel_mutex);
+
+ goto loop;
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ mutex_enter(&(log_sys->mutex));
+
+ if (log_sys->n_pending_archive_ios
+ + log_sys->n_pending_checkpoint_writes
+ + log_sys->n_pending_writes > 0) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ goto loop;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (!buf_pool_check_no_pending_io()) {
+
+ goto loop;
+ }
+
+ log_archive_all();
+
+ log_make_checkpoint_at(ut_dulint_max, TRUE);
+
+ mutex_enter(&(log_sys->mutex));
+
+ lsn = log_sys->lsn;
+
+ if (ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0
+ || (srv_log_archive_on
+ && ut_dulint_cmp(lsn,
+ ut_dulint_add(log_sys->archived_lsn, LOG_BLOCK_HDR_SIZE)) != 0)) {
+
+ mutex_exit(&(log_sys->mutex));
+
+ goto loop;
+ }
+
+ arch_log_no =
+ UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no;
+
+ if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) {
+
+ arch_log_no--;
+ }
+
+ log_archive_close_groups(TRUE);
+
+ mutex_exit(&(log_sys->mutex));
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+ fil_flush_file_spaces(FIL_LOG);
+
+ /* The following fil_write_... will pass the buffer pool: therefore
+ it is essential that the buffer pool has been completely flushed
+ to disk! */
+
+ if (!buf_all_freed()) {
+
+ goto loop;
+ }
+
+ fil_write_flushed_lsn_to_data_files(lsn, arch_log_no);
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ fprintf(stderr, "Innobase: Shutdown completed\n");
+}
+
+/**********************************************************
+Checks by parsing that the catenated log segment for a single mtr is
+consistent. */
+
+ibool
+log_check_log_recs(
+/*===============*/
+ byte* buf, /* in: pointer to the start of the log segment
+ in the log_sys->buf log buffer */
+ ulint len, /* in: segment length in bytes */
+ dulint buf_start_lsn) /* in: buffer start lsn */
+{
+ dulint contiguous_lsn;
+ dulint scanned_lsn;
+ byte* start;
+ byte* end;
+ byte* buf1;
+ byte* scan_buf;
+
+ ut_ad(mutex_own(&(log_sys->mutex)));
+
+ if (len == 0) {
+
+ return(TRUE);
+ }
+
+ start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE);
+ end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE);
+
+ buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE);
+ scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE);
+
+ ut_memcpy(scan_buf, start, end - start);
+
+ recv_scan_log_recs(FALSE, scan_buf, end - start,
+ ut_dulint_align_down(buf_start_lsn,
+ OS_FILE_LOG_BLOCK_SIZE),
+ &contiguous_lsn, &scanned_lsn);
+
+ ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len))
+ == 0);
+ ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0);
+
+ mem_free(buf1);
+
+ return(TRUE);
+}
+
+/**********************************************************
+Prints info of the log. */
+
+void
+log_print(void)
+/*===========*/
+{
+ printf("Log sequence number %lu %lu\n",
+ ut_dulint_get_high(log_sys->lsn),
+ ut_dulint_get_low(log_sys->lsn));
+}
+