diff options
Diffstat (limited to 'lib/gitlab/database/load_balancing')
6 files changed, 79 insertions, 45 deletions
diff --git a/lib/gitlab/database/load_balancing/load_balancer.rb b/lib/gitlab/database/load_balancing/load_balancer.rb index a833bb8491f..a5d67ebc050 100644 --- a/lib/gitlab/database/load_balancing/load_balancer.rb +++ b/lib/gitlab/database/load_balancing/load_balancer.rb @@ -147,15 +147,15 @@ module Gitlab raise 'Failed to determine the write location of the primary database' end - # Returns true if all hosts have caught up to the given transaction - # write location. - def all_caught_up?(location) - @host_list.hosts.all? { |host| host.caught_up?(location) } - end - # Returns true if there was at least one host that has caught up with the given transaction. # # In case of a retry, this method also stores the set of hosts that have caught up. + # + # UPD: `select_caught_up_hosts` seems to have redundant logic managing host list (`:gitlab_load_balancer_valid_hosts`), + # while we only need a single host: https://gitlab.com/gitlab-org/gitlab/-/issues/326125#note_615271604 + # Also, shuffling the list afterwards doesn't seem to be necessary. + # This may be improved by merging this method with `select_up_to_date_host`. + # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out def select_caught_up_hosts(location) all_hosts = @host_list.hosts valid_hosts = all_hosts.select { |host| host.caught_up?(location) } @@ -179,6 +179,8 @@ module Gitlab # Returns true if there was at least one host that has caught up with the given transaction. # Similar to `#select_caught_up_hosts`, picks a random host, to rotate replicas we use. # Unlike `#select_caught_up_hosts`, does not iterate over all hosts if finds any. + # + # It is going to be merged with `select_caught_up_hosts`, because they intend to do the same. def select_up_to_date_host(location) all_hosts = @host_list.hosts.shuffle host = all_hosts.find { |host| host.caught_up?(location) } @@ -190,6 +192,7 @@ module Gitlab true end + # Could be removed when `:load_balancing_refine_load_balancer_methods` FF is rolled out def set_consistent_hosts_for_request(hosts) RequestStore[VALID_HOSTS_CACHE_KEY] = hosts end diff --git a/lib/gitlab/database/load_balancing/rack_middleware.rb b/lib/gitlab/database/load_balancing/rack_middleware.rb index 4734ff99bd3..8e7e6865402 100644 --- a/lib/gitlab/database/load_balancing/rack_middleware.rb +++ b/lib/gitlab/database/load_balancing/rack_middleware.rb @@ -39,6 +39,8 @@ module Gitlab result = @app.call(env) + ActiveSupport::Notifications.instrument('web_transaction_completed.load_balancing') + stick_if_necessary(env) result diff --git a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb index 524d69c00c0..0e36ebbc3ee 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_client_middleware.rb @@ -5,27 +5,29 @@ module Gitlab module LoadBalancing class SidekiqClientMiddleware def call(worker_class, job, _queue, _redis_pool) + # Mailers can't be constantized worker_class = worker_class.to_s.safe_constantize - mark_data_consistency_location(worker_class, job) + if load_balancing_enabled?(worker_class) + job['worker_data_consistency'] = worker_class.get_data_consistency + set_data_consistency_location!(job) unless location_already_provided?(job) + else + job['worker_data_consistency'] = ::WorkerAttributes::DEFAULT_DATA_CONSISTENCY + end yield end private - def mark_data_consistency_location(worker_class, job) - # Mailers can't be constantized - return unless worker_class - return unless worker_class.include?(::ApplicationWorker) - return unless worker_class.get_data_consistency_feature_flag_enabled? - - return if location_already_provided?(job) - - job['worker_data_consistency'] = worker_class.get_data_consistency - - return unless worker_class.utilizes_load_balancing_capabilities? + def load_balancing_enabled?(worker_class) + worker_class && + worker_class.include?(::ApplicationWorker) && + worker_class.utilizes_load_balancing_capabilities? && + worker_class.get_data_consistency_feature_flag_enabled? + end + def set_data_consistency_location!(job) if Session.current.use_primary? job['database_write_location'] = load_balancer.primary_write_location else diff --git a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb index 9bd0adf8dbd..0551750568a 100644 --- a/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb +++ b/lib/gitlab/database/load_balancing/sidekiq_server_middleware.rb @@ -7,8 +7,18 @@ module Gitlab JobReplicaNotUpToDate = Class.new(StandardError) def call(worker, job, _queue) - if requires_primary?(worker.class, job) + worker_class = worker.class + strategy = select_load_balancing_strategy(worker_class, job) + + job['load_balancing_strategy'] = strategy.to_s + + if use_primary?(strategy) Session.current.use_primary! + elsif strategy == :retry + raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\ + " Replica was not up to date." + else + # this means we selected an up-to-date replica, but there is nothing to do in this case. end yield @@ -23,31 +33,42 @@ module Gitlab Session.clear_session end - def requires_primary?(worker_class, job) - return true unless worker_class.include?(::ApplicationWorker) - return true unless worker_class.utilizes_load_balancing_capabilities? - return true unless worker_class.get_data_consistency_feature_flag_enabled? - - location = job['database_write_location'] || job['database_replica_location'] + def use_primary?(strategy) + strategy.start_with?('primary') + end - return true unless location + def select_load_balancing_strategy(worker_class, job) + return :primary unless load_balancing_available?(worker_class) - job_data_consistency = worker_class.get_data_consistency - job[:data_consistency] = job_data_consistency.to_s + location = job['database_write_location'] || job['database_replica_location'] + return :primary_no_wal unless location if replica_caught_up?(location) - job[:database_chosen] = 'replica' - false - elsif job_data_consistency == :delayed && not_yet_retried?(job) - job[:database_chosen] = 'retry' - raise JobReplicaNotUpToDate, "Sidekiq job #{worker_class} JID-#{job['jid']} couldn't use the replica."\ - " Replica was not up to date." + # Happy case: we can read from a replica. + retried_before?(worker_class, job) ? :replica_retried : :replica + elsif can_retry?(worker_class, job) + # Optimistic case: The worker allows retries and we have retries left. + :retry else - job[:database_chosen] = 'primary' - true + # Sad case: we need to fall back to the primary. + :primary end end + def load_balancing_available?(worker_class) + worker_class.include?(::ApplicationWorker) && + worker_class.utilizes_load_balancing_capabilities? && + worker_class.get_data_consistency_feature_flag_enabled? + end + + def can_retry?(worker_class, job) + worker_class.get_data_consistency == :delayed && not_yet_retried?(job) + end + + def retried_before?(worker_class, job) + worker_class.get_data_consistency == :delayed && !not_yet_retried?(job) + end + def not_yet_retried?(job) # if `retry_count` is `nil` it indicates that this job was never retried # the `0` indicates that this is a first retry @@ -59,11 +80,7 @@ module Gitlab end def replica_caught_up?(location) - if Feature.enabled?(:sidekiq_load_balancing_rotate_up_to_date_replica) - load_balancer.select_up_to_date_host(location) - else - load_balancer.host.caught_up?(location) - end + load_balancer.select_up_to_date_host(location) end end end diff --git a/lib/gitlab/database/load_balancing/srv_resolver.rb b/lib/gitlab/database/load_balancing/srv_resolver.rb index 20da525f4d2..1f599ef4a27 100644 --- a/lib/gitlab/database/load_balancing/srv_resolver.rb +++ b/lib/gitlab/database/load_balancing/srv_resolver.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +require 'net/dns' + module Gitlab module Database module LoadBalancing diff --git a/lib/gitlab/database/load_balancing/sticking.rb b/lib/gitlab/database/load_balancing/sticking.rb index efbd7099300..8e1aa079216 100644 --- a/lib/gitlab/database/load_balancing/sticking.rb +++ b/lib/gitlab/database/load_balancing/sticking.rb @@ -33,8 +33,10 @@ module Gitlab return true unless location - load_balancer.all_caught_up?(location).tap do |caught_up| - unstick(namespace, id) if caught_up + load_balancer.select_up_to_date_host(location).tap do |found| + ActiveSupport::Notifications.instrument('caught_up_replica_pick.load_balancing', { result: found } ) + + unstick(namespace, id) if found end end @@ -51,8 +53,14 @@ module Gitlab # write location. If no such location exists, err on the side of caution. return false unless location - load_balancer.select_caught_up_hosts(location).tap do |selected| - unstick(namespace, id) if selected + if ::Feature.enabled?(:load_balancing_refine_load_balancer_methods) + load_balancer.select_up_to_date_host(location).tap do |selected| + unstick(namespace, id) if selected + end + else + load_balancer.select_caught_up_hosts(location).tap do |selected| + unstick(namespace, id) if selected + end end end |