summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/batch_count.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/database/batch_count.rb')
-rw-r--r--lib/gitlab/database/batch_count.rb25
1 files changed, 20 insertions, 5 deletions
diff --git a/lib/gitlab/database/batch_count.rb b/lib/gitlab/database/batch_count.rb
index 1762b81b7d8..0de67ed8cf0 100644
--- a/lib/gitlab/database/batch_count.rb
+++ b/lib/gitlab/database/batch_count.rb
@@ -8,15 +8,20 @@
# In order to not use a possible complex time consuming query when calculating min and max for batch_distinct_count
# the start and finish can be sent specifically
#
+# Grouped relations can be used as well. However, the preferred batch count should be around 10K because group by count is more expensive.
+#
# See https://gitlab.com/gitlab-org/gitlab/-/merge_requests/22705
#
# Examples:
# extend ::Gitlab::Database::BatchCount
# batch_count(User.active)
# batch_count(::Clusters::Cluster.aws_installed.enabled, :cluster_id)
+# batch_count(Namespace.group(:type))
# batch_distinct_count(::Project, :creator_id)
# batch_distinct_count(::Project.with_active_services.service_desk_enabled.where(time_period), start: ::User.minimum(:id), finish: ::User.maximum(:id))
+# batch_distinct_count(Project.group(:visibility_level), :creator_id)
# batch_sum(User, :sign_in_count)
+# batch_sum(Issue.group(:state_id), :weight))
module Gitlab
module Database
module BatchCount
@@ -77,12 +82,12 @@ module Gitlab
raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
return FALLBACK if unwanted_configuration?(finish, batch_size, start)
- counter = 0
+ results = nil
batch_start = start
while batch_start <= finish
begin
- counter += batch_fetch(batch_start, batch_start + batch_size, mode)
+ results = merge_results(results, batch_fetch(batch_start, batch_start + batch_size, mode))
batch_start += batch_size
rescue ActiveRecord::QueryCanceled
# retry with a safe batch size & warmer cache
@@ -95,7 +100,17 @@ module Gitlab
sleep(SLEEP_TIME_IN_SECONDS)
end
- counter
+ results
+ end
+
+ def merge_results(results, object)
+ return object unless results
+
+ if object.is_a?(Hash)
+ results.merge!(object) { |_, a, b| a + b }
+ else
+ results + object
+ end
end
def batch_fetch(start, finish, mode)
@@ -118,11 +133,11 @@ module Gitlab
end
def actual_start(start)
- start || @relation.minimum(@column) || 0
+ start || @relation.unscope(:group, :having).minimum(@column) || 0
end
def actual_finish(finish)
- finish || @relation.maximum(@column) || 0
+ finish || @relation.unscope(:group, :having).maximum(@column) || 0
end
def check_mode!(mode)