diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2018-06-07 14:29:35 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2018-06-07 14:29:35 +0300 |
commit | dc9c555415b1d37b713839b9f8143a053f402c0e (patch) | |
tree | ad8233c842be126bce5d4d0dc08e5df7036b0bc6 /extra | |
parent | 619c277a6cc11b62e9d84449c0ee8ab39abc54f3 (diff) | |
download | mariadb-git-dc9c555415b1d37b713839b9f8143a053f402c0e.tar.gz |
MDEV-16367 mariabackup: error: failed to copy enough redo log
log_copying_thread(): Keep copying redo log until the end has been
reached. (Previously, we would stop copying as soon as
the first batch of xtrabackup_copy_logfile() returned.)
log_copying: Remove. Use log_copying_running instead.
copy_logfile: Remove. Log copying will now only be invoked from
2 places: from xtrabackup_backup_func() for the initial batch,
and from log_copying_thread() until all of the log has been read.
Use the global variable metadata_to_lsn for determining if the
final part of the log is being copied.
xtrabackup_copy_log(): Add diagnostic messages for terminating
the copying. These messages should be dead code, because
log_group_read_log_seg() should be checking for the same.
xtrabackup_copy_logfile(): Correct the retrying logic.
If anything was successfully read, process the portion that
was read. On failure, let the caller close dst_log_file.
io_watching_thread(): Stop throttling during the last phase
of copying the log (metadata_to_lsn!=0). The final copying
of the log will now be performed in log_copying_thread().
stop_backup_threads(): Clean up the message about stopping
the log copying thread.
xtrabackup_backup_low(): Read metadata_to_lsn from the latest
checkpoint header page, even if it is the first page.
Let the log_copying_thread take care of copying all of
the redo log.
Diffstat (limited to 'extra')
-rw-r--r-- | extra/mariabackup/xtrabackup.cc | 113 |
1 files changed, 56 insertions, 57 deletions
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index e14e65d1679..65fddedc61a 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -172,7 +172,6 @@ typedef struct xb_filter_entry_struct xb_filter_entry_t; lsn_t checkpoint_lsn_start; lsn_t checkpoint_no_start; static lsn_t log_copy_scanned_lsn; -static bool log_copying; static bool log_copying_running; static bool io_watching_thread_running; @@ -2452,25 +2451,12 @@ skip: return(FALSE); } -/** How to copy a redo log segment in backup */ -enum copy_logfile { - /** Initial copying: copy at least one block */ - COPY_FIRST, - /** Tracking while copying data files */ - COPY_ONLINE, - /** Final copying: copy until the end of the log */ - COPY_LAST -}; - /** Copy redo log blocks to the data sink. -@param[in] copy how to copy the log -@param[in] start_lsn buffer start LSN -@param[in] end_lsn buffer end LSN -@return last scanned LSN (equals to last copied LSN if copy=COPY_LAST) +@param start_lsn buffer start LSN +@param end_lsn buffer end LSN +@return last scanned LSN @retval 0 on failure */ -static -lsn_t -xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) +static lsn_t xtrabackup_copy_log(lsn_t start_lsn, lsn_t end_lsn) { lsn_t scanned_lsn = start_lsn; const byte* log_block = log_sys->buf; @@ -2485,6 +2471,9 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) && scanned_checkpoint - checkpoint >= 0x80000000UL) { /* Garbage from a log buffer flush which was made before the most recent database recovery */ + msg("mariabackup: checkpoint wrap: " + LSN_PF ",%zx,%zx\n", + scanned_lsn, scanned_checkpoint, checkpoint); break; } @@ -2505,6 +2494,8 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE || data_len <= LOG_BLOCK_HDR_SIZE) { /* We got a garbage block (abrupt end of the log). */ + msg("mariabackup: garbage block: " LSN_PF ",%zu\n", + scanned_lsn, data_len); break; } else { /* We got a partial block (abrupt end of the log). */ @@ -2524,7 +2515,7 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) log_sys->log.scanned_lsn = scanned_lsn; - end_lsn = copy == COPY_LAST + end_lsn = metadata_to_lsn ? ut_uint64_align_up(scanned_lsn, OS_FILE_LOG_BLOCK_SIZE) : scanned_lsn & ~lsn_t(OS_FILE_LOG_BLOCK_SIZE - 1); @@ -2544,10 +2535,8 @@ xtrabackup_copy_log(copy_logfile copy, lsn_t start_lsn, lsn_t end_lsn) } /** Copy redo log until the current end of the log is reached -@param copy how to copy the log @return whether the operation failed */ -static bool -xtrabackup_copy_logfile(copy_logfile copy) +static bool xtrabackup_copy_logfile() { ut_a(dst_log_file != NULL); ut_ad(recv_sys != NULL); @@ -2560,33 +2549,29 @@ xtrabackup_copy_logfile(copy_logfile copy) start_lsn = ut_uint64_align_down(log_copy_scanned_lsn, OS_FILE_LOG_BLOCK_SIZE); - /* When copying the first or last part of the log, retry a few - times to ensure that all log up to the last checkpoint will be - read. */ do { end_lsn = start_lsn + RECV_SCAN_SIZE; xtrabackup_io_throttling(); log_mutex_enter(); - lsn_t lsn= start_lsn; - for(int retries= 0; retries < 100; retries++) { + for (int retries= 0; retries < 100; retries++) { if (log_group_read_log_seg(log_sys->buf, &log_sys->log, - &lsn, end_lsn)){ + &lsn, end_lsn) + || lsn != start_lsn) { break; } - msg("Retrying read of a redo log block"); + msg("Retrying read of log at LSN=" LSN_PF "\n", lsn); my_sleep(1000); } - start_lsn = xtrabackup_copy_log(copy, start_lsn, lsn); + start_lsn = (lsn == start_lsn) + ? 0 : xtrabackup_copy_log(start_lsn, lsn); log_mutex_exit(); if (!start_lsn) { - ds_close(dst_log_file); - dst_log_file = NULL; msg("mariabackup: Error: xtrabackup_copy_logfile()" " failed.\n"); return(true); @@ -2612,12 +2597,23 @@ static os_thread_ret_t log_copying_thread(void*) */ my_thread_init(); - do { + for (;;) { os_event_reset(log_copying_stop); os_event_wait_time_low(log_copying_stop, xtrabackup_log_copy_interval * 1000ULL, 0); - } while (log_copying && xtrabackup_copy_logfile(COPY_ONLINE)); + if (xtrabackup_copy_logfile()) { + break; + } + + log_mutex_enter(); + bool completed = metadata_to_lsn + && metadata_to_lsn < log_copy_scanned_lsn; + log_mutex_exit(); + if (completed) { + break; + } + } log_copying_running = false; my_thread_end(); @@ -2632,7 +2628,7 @@ static os_thread_ret_t io_watching_thread(void*) /* currently, for --backup only */ ut_a(xtrabackup_backup); - while (log_copying) { + while (log_copying_running && !metadata_to_lsn) { os_thread_sleep(1000000); /*1 sec*/ io_ticket = xtrabackup_throttle; os_event_set(wait_throttle); @@ -3662,16 +3658,16 @@ end: static void stop_backup_threads() { - log_copying = false; - - if (log_copying_stop) { + if (log_copying_stop && log_copying_running) { os_event_set(log_copying_stop); - msg("mariabackup: Stopping log copying thread.\n"); + fputs("mariabackup: Stopping log copying thread", stderr); + fflush(stderr); while (log_copying_running) { - msg("."); + putc('.', stderr); + fflush(stderr); os_thread_sleep(200000); /*0.2 sec*/ } - msg("\n"); + putc('\n', stderr); os_event_destroy(log_copying_stop); } @@ -3686,10 +3682,10 @@ static void stop_backup_threads() /** Implement the core of --backup @return whether the operation succeeded */ -static -bool -xtrabackup_backup_low() +static bool xtrabackup_backup_low() { + ut_ad(!metadata_to_lsn); + /* read the latest checkpoint lsn */ { ulint max_cp_field; @@ -3698,13 +3694,16 @@ xtrabackup_backup_low() if (recv_find_max_checkpoint(&max_cp_field) == DB_SUCCESS && log_sys->log.format != 0) { + if (max_cp_field == LOG_CHECKPOINT_1) { + log_group_header_read(&log_sys->log, + max_cp_field); + } metadata_to_lsn = mach_read_from_8( log_sys->checkpoint_buf + LOG_CHECKPOINT_LSN); msg("mariabackup: The latest check point" " (for incremental): '" LSN_PF "'\n", metadata_to_lsn); } else { - metadata_to_lsn = 0; msg("mariabackup: Error: recv_find_max_checkpoint() failed.\n"); } log_mutex_exit(); @@ -3712,11 +3711,7 @@ xtrabackup_backup_low() stop_backup_threads(); - if (!dst_log_file || xtrabackup_copy_logfile(COPY_LAST)) { - return false; - } - - if (ds_close(dst_log_file)) { + if (ds_close(dst_log_file) || !metadata_to_lsn) { dst_log_file = NULL; return false; } @@ -3795,6 +3790,7 @@ xtrabackup_backup_func() srv_read_only_mode = TRUE; srv_operation = SRV_OPERATION_BACKUP; + metadata_to_lsn = 0; if (xb_close_files) msg("mariabackup: warning: close-files specified. Use it " @@ -3805,7 +3801,12 @@ xtrabackup_backup_func() /* initialize components */ if(innodb_init_param()) { fail: + metadata_to_lsn = log_copying_running; stop_backup_threads(); + if (dst_log_file) { + ds_close(dst_log_file); + dst_log_file = NULL; + } if (fil_system) { innodb_shutdown(); } @@ -3814,7 +3815,6 @@ fail: xb_normalize_init_values(); - if (srv_file_flush_method_str == NULL) { /* These are the default options */ srv_file_flush_method = SRV_FSYNC; @@ -4058,9 +4058,7 @@ reread_log_header: goto log_write_fail; } - /* start flag */ - log_copying = TRUE; - + log_copying_running = true; /* start io throttle */ if(xtrabackup_throttle) { os_thread_id_t io_watching_thread_id; @@ -4078,6 +4076,8 @@ reread_log_header: if (err != DB_SUCCESS) { msg("mariabackup: error: xb_load_tablespaces() failed with" " error %s.\n", ut_strerr(err)); +fail_before_log_copying_thread_start: + log_copying_running = false; goto fail; } @@ -4085,11 +4085,10 @@ reread_log_header: log_copy_scanned_lsn = checkpoint_lsn_start; recv_sys->recovered_lsn = log_copy_scanned_lsn; - if (xtrabackup_copy_logfile(COPY_FIRST)) - goto fail; + if (xtrabackup_copy_logfile()) + goto fail_before_log_copying_thread_start; log_copying_stop = os_event_create(0); - log_copying_running = true; os_thread_create(log_copying_thread, NULL, &log_copying_thread_id); /* FLUSH CHANGED_PAGE_BITMAPS call */ |