summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/load_balancing
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/database/load_balancing')
-rw-r--r--lib/gitlab/database/load_balancing/load_balancer.rb15
-rw-r--r--lib/gitlab/database/load_balancing/rack_middleware.rb2
-rw-r--r--lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb26
-rw-r--r--lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb63
-rw-r--r--lib/gitlab/database/load_balancing/srv_resolver.rb2
-rw-r--r--lib/gitlab/database/load_balancing/sticking.rb16
6 files changed, 79 insertions, 45 deletions
diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb
index a833bb8491f..a5d67ebc050 100644
--- a/lib/gitlab/database/load_balancing/load_balancer.rb
+++ b/lib/gitlab/database/load_balancing/load_balancer.rb
@@ -147,15 +147,15 @@ module Gitlab
raise 'Failed to determine the write location of the primary database'
end
- # Returns true if all hosts have caught up to the given transaction
- # write location.
- def all_caught_up?(location)
- @host_list.hosts.all? { |host| host.caught_up?(location) }
- end
-
# Returns true if there was at least one host that has caught up with the given transaction.
#
# In case of a retry, this method also stores the set of hosts that have caught up.
+ #
+ # UPD: `select_caught_up_hosts` seems to have redundant logic managing host list (`:gitlab_load_balancer_valid_hosts`),
+ # while we only need a single host: https://gitlab.com/gitlab-org/gitlab/-/issues/326125#note_615271604
+ # Also, shuffling the list afterwards doesn't seem to be necessary.
+ # This may be improved by merging this method with `select_up_to_date_host`.
+ # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out
def select_caught_up_hosts(location)
all_hosts = @host_list.hosts
valid_hosts = all_hosts.select { |host| host.caught_up?(location) }
@@ -179,6 +179,8 @@ module Gitlab
# Returns true if there was at least one host that has caught up with the given transaction.
# Similar to `#select_caught_up_hosts`, picks a random host, to rotate replicas we use.
# Unlike `#select_caught_up_hosts`, does not iterate over all hosts if finds any.
+ #
+ # It is going to be merged with `select_caught_up_hosts`, because they intend to do the same.
def select_up_to_date_host(location)
all_hosts = @host_list.hosts.shuffle
host = all_hosts.find { |host| host.caught_up?(location) }
@@ -190,6 +192,7 @@ module Gitlab
true
end
+ # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out
def set_consistent_hosts_for_request(hosts)
RequestStore[VALID_HOSTS_CACHE_KEY] = hosts
end
diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb
index 4734ff99bd3..8e7e6865402 100644
--- a/lib/gitlab/database/load_balancing/rack_middleware.rb
+++ b/lib/gitlab/database/load_balancing/rack_middleware.rb
@@ -39,6 +39,8 @@ module Gitlab
result = @app.call(env)
+ ActiveSupport::Notifications.instrument('web_transaction_completed.load_balancing')
+
stick_if_necessary(env)
result
diff --git a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
index 524d69c00c0..0e36ebbc3ee 100644
--- a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
+++ b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb
@@ -5,27 +5,29 @@ module Gitlab
module LoadBalancing
class SidekiqClientMiddleware
def call(worker_class, job, _queue, _redis_pool)
+ # Mailers can't be constantized
worker_class = worker_class.to_s.safe_constantize
- mark_data_consistency_location(worker_class, job)
+ if load_balancing_enabled?(worker_class)
+ job['worker_data_consistency'] = worker_class.get_data_consistency
+ set_data_consistency_location!(job) unless location_already_provided?(job)
+ else
+ job['worker_data_consistency'] = ::WorkerAttributes::DEFAULT_DATA_CONSISTENCY
+ end
yield
end
private
- def mark_data_consistency_location(worker_class, job)
- # Mailers can't be constantized
- return unless worker_class
- return unless worker_class.include?(::ApplicationWorker)
- return unless worker_class.get_data_consistency_feature_flag_enabled?
-
- return if location_already_provided?(job)
-
- job['worker_data_consistency'] = worker_class.get_data_consistency
-
- return unless worker_class.utilizes_load_balancing_capabilities?
+ def load_balancing_enabled?(worker_class)
+ worker_class &&
+ worker_class.include?(::ApplicationWorker) &&
+ worker_class.utilizes_load_balancing_capabilities? &&
+ worker_class.get_data_consistency_feature_flag_enabled?
+ end
+ def set_data_consistency_location!(job)
if Session.current.use_primary?
job['database_write_location'] = load_balancer.primary_write_location
else
diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
index 9bd0adf8dbd..0551750568a 100644
--- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
+++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb
@@ -7,8 +7,18 @@ module Gitlab
JobReplicaNotUpToDate = Class.new(StandardError)
def call(worker, job, _queue)
- if requires_primary?(worker.class, job)
+ worker_class = worker.class
+ strategy = select_load_balancing_strategy(worker_class, job)
+
+ job['load_balancing_strategy'] = strategy.to_s
+
+ if use_primary?(strategy)
Session.current.use_primary!
+ elsif strategy == :retry
+ raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\
+ " Replica was not up to date."
+ else
+ # this means we selected an up-to-date replica, but there is nothing to do in this case.
end
yield
@@ -23,31 +33,42 @@ module Gitlab
Session.clear_session
end
- def requires_primary?(worker_class, job)
- return true unless worker_class.include?(::ApplicationWorker)
- return true unless worker_class.utilizes_load_balancing_capabilities?
- return true unless worker_class.get_data_consistency_feature_flag_enabled?
-
- location = job['database_write_location'] || job['database_replica_location']
+ def use_primary?(strategy)
+ strategy.start_with?('primary')
+ end
- return true unless location
+ def select_load_balancing_strategy(worker_class, job)
+ return :primary unless load_balancing_available?(worker_class)
- job_data_consistency = worker_class.get_data_consistency
- job[:data_consistency] = job_data_consistency.to_s
+ location = job['database_write_location'] || job['database_replica_location']
+ return :primary_no_wal unless location
if replica_caught_up?(location)
- job[:database_chosen] = 'replica'
- false
- elsif job_data_consistency == :delayed && not_yet_retried?(job)
- job[:database_chosen] = 'retry'
- raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\
- " Replica was not up to date."
+ # Happy case: we can read from a replica.
+ retried_before?(worker_class, job) ? :replica_retried : :replica
+ elsif can_retry?(worker_class, job)
+ # Optimistic case: The worker allows retries and we have retries left.
+ :retry
else
- job[:database_chosen] = 'primary'
- true
+ # Sad case: we need to fall back to the primary.
+ :primary
end
end
+ def load_balancing_available?(worker_class)
+ worker_class.include?(::ApplicationWorker) &&
+ worker_class.utilizes_load_balancing_capabilities? &&
+ worker_class.get_data_consistency_feature_flag_enabled?
+ end
+
+ def can_retry?(worker_class, job)
+ worker_class.get_data_consistency == :delayed && not_yet_retried?(job)
+ end
+
+ def retried_before?(worker_class, job)
+ worker_class.get_data_consistency == :delayed && !not_yet_retried?(job)
+ end
+
def not_yet_retried?(job)
# if `retry_count` is `nil` it indicates that this job was never retried
# the `0` indicates that this is a first retry
@@ -59,11 +80,7 @@ module Gitlab
end
def replica_caught_up?(location)
- if Feature.enabled?(:sidekiq_load_balancing_rotate_up_to_date_replica)
- load_balancer.select_up_to_date_host(location)
- else
- load_balancer.host.caught_up?(location)
- end
+ load_balancer.select_up_to_date_host(location)
end
end
end
diff --git a/lib/gitlab/database/load_balancing/srv_resolver.rb b/lib/gitlab/database/load_balancing/srv_resolver.rb
index 20da525f4d2..1f599ef4a27 100644
--- a/lib/gitlab/database/load_balancing/srv_resolver.rb
+++ b/lib/gitlab/database/load_balancing/srv_resolver.rb
@@ -1,5 +1,7 @@
# frozen_string_literal: true
+require 'net/dns'
+
module Gitlab
module Database
module LoadBalancing
diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb
index efbd7099300..8e1aa079216 100644
--- a/lib/gitlab/database/load_balancing/sticking.rb
+++ b/lib/gitlab/database/load_balancing/sticking.rb
@@ -33,8 +33,10 @@ module Gitlab
return true unless location
- load_balancer.all_caught_up?(location).tap do |caught_up|
- unstick(namespace, id) if caught_up
+ load_balancer.select_up_to_date_host(location).tap do |found|
+ ActiveSupport::Notifications.instrument('caught_up_replica_pick.load_balancing', { result: found } )
+
+ unstick(namespace, id) if found
end
end
@@ -51,8 +53,14 @@ module Gitlab
# write location. If no such location exists, err on the side of caution.
return false unless location
- load_balancer.select_caught_up_hosts(location).tap do |selected|
- unstick(namespace, id) if selected
+ if ::Feature.enabled?(:load_balancing_refine_load_balancer_methods)
+ load_balancer.select_up_to_date_host(location).tap do |selected|
+ unstick(namespace, id) if selected
+ end
+ else
+ load_balancer.select_caught_up_hosts(location).tap do |selected|
+ unstick(namespace, id) if selected
+ end
end
end