diff options
author | Bob Van Landuyt <bob@gitlab.com> | 2019-08-13 20:52:01 +0000 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2019-08-13 20:52:01 +0000 |
commit | 452bc36d603ed89d3fa5e3409338dd905230bd2f (patch) | |
tree | 3ef260430db93ef2b9fa9236ea601a0b3e53adee /app/models/remote_mirror.rb | |
parent | 1c3b570c117cc41f5f4838a8366c4367ef0749cb (diff) | |
download | gitlab-ce-452bc36d603ed89d3fa5e3409338dd905230bd2f.tar.gz |
Rework retry strategy for remote mirrors
**Prevention of running 2 simultaneous updates**
Instead of using `RemoteMirror#update_status` and raise an error if
it's already running to prevent the same mirror being updated at the
same time we now use `Gitlab::ExclusiveLease` for that.
When we fail to obtain a lease in 3 tries, 30 seconds apart, we bail
and reschedule. We'll reschedule faster for the protected branches.
If the mirror already ran since it was scheduled, the job will be
skipped.
**Error handling: Remote side**
When an update fails because of a `Gitlab::Git::CommandError`, we
won't track this error in sentry, this could be on the remote side:
for example when branches have diverged.
In this case, we'll try 3 times scheduled 1 or 5 minutes apart.
In between, the mirror is marked as "to_retry", the error would be
visible to the user when they visit the settings page.
After 3 tries we'll mark the mirror as failed and notify the user.
We won't track this error in sentry, as it's not likely we can help
it.
The next event that would trigger a new refresh.
**Error handling: our side**
If an unexpected error occurs, we mark the mirror as failed, but we'd
still retry the job based on the regular sidekiq retries with
backoff. Same as we used to
The error would be reported in sentry, since its likely we need to do
something about it.
Diffstat (limited to 'app/models/remote_mirror.rb')
-rw-r--r-- | app/models/remote_mirror.rb | 57 |
1 files changed, 43 insertions, 14 deletions
diff --git a/app/models/remote_mirror.rb b/app/models/remote_mirror.rb index 6b5605f9999..c9ee0653d86 100644 --- a/app/models/remote_mirror.rb +++ b/app/models/remote_mirror.rb @@ -4,6 +4,8 @@ class RemoteMirror < ApplicationRecord include AfterCommitQueue include MirrorAuthentication + MAX_FIRST_RUNTIME = 3.hours + MAX_INCREMENTAL_RUNTIME = 1.hour PROTECTED_BACKOFF_DELAY = 1.minute UNPROTECTED_BACKOFF_DELAY = 5.minutes @@ -31,11 +33,18 @@ class RemoteMirror < ApplicationRecord scope :enabled, -> { where(enabled: true) } scope :started, -> { with_update_status(:started) } - scope :stuck, -> { started.where('last_update_at < ? OR (last_update_at IS NULL AND updated_at < ?)', 1.hour.ago, 3.hours.ago) } + + scope :stuck, -> do + started + .where('(last_update_started_at < ? AND last_update_at IS NOT NULL)', + MAX_INCREMENTAL_RUNTIME.ago) + .or(where('(last_update_started_at < ? AND last_update_at IS NULL)', + MAX_FIRST_RUNTIME.ago)) + end state_machine :update_status, initial: :none do event :update_start do - transition [:none, :finished, :failed] => :started + transition any => :started end event :update_finish do @@ -46,9 +55,14 @@ class RemoteMirror < ApplicationRecord transition started: :failed end + event :update_retry do + transition started: :to_retry + end + state :started state :finished state :failed + state :to_retry after_transition any => :started do |remote_mirror, _| Gitlab::Metrics.add_event(:remote_mirrors_running) @@ -138,16 +152,27 @@ class RemoteMirror < ApplicationRecord end def updated_since?(timestamp) - last_update_started_at && last_update_started_at > timestamp && !update_failed? + return false if failed? + + last_update_started_at && last_update_started_at > timestamp end def mark_for_delete_if_blank_url mark_for_destruction if url.blank? end - def mark_as_failed(error_message) - update_column(:last_error, Gitlab::UrlSanitizer.sanitize(error_message)) - update_fail + def update_error_message(error_message) + self.last_error = Gitlab::UrlSanitizer.sanitize(error_message) + end + + def mark_for_retry!(error_message) + update_error_message(error_message) + update_retry! + end + + def mark_as_failed!(error_message) + update_error_message(error_message) + update_fail! end def url=(value) @@ -190,6 +215,18 @@ class RemoteMirror < ApplicationRecord update_column(:error_notification_sent, true) end + def backoff_delay + if self.only_protected_branches + PROTECTED_BACKOFF_DELAY + else + UNPROTECTED_BACKOFF_DELAY + end + end + + def max_runtime + last_update_at.present? ? MAX_INCREMENTAL_RUNTIME : MAX_FIRST_RUNTIME + end + private def store_credentials @@ -219,14 +256,6 @@ class RemoteMirror < ApplicationRecord self.last_update_started_at >= Time.now - backoff_delay end - def backoff_delay - if self.only_protected_branches - PROTECTED_BACKOFF_DELAY - else - UNPROTECTED_BACKOFF_DELAY - end - end - def reset_fields update_columns( last_error: nil, |