summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/count/tablesample_count_strategy.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/database/count/tablesample_count_strategy.rb')
-rw-r--r--lib/gitlab/database/count/tablesample_count_strategy.rb66
1 files changed, 66 insertions, 0 deletions
diff --git a/lib/gitlab/database/count/tablesample_count_strategy.rb b/lib/gitlab/database/count/tablesample_count_strategy.rb
new file mode 100644
index 00000000000..cf1cf054dbf
--- /dev/null
+++ b/lib/gitlab/database/count/tablesample_count_strategy.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Count
+ # A tablesample count executes in two phases:
+ # * Estimate table sizes based on reltuples.
+ # * Based on the estimate:
+ # * If the table is considered 'small', execute an exact relation count.
+ # * Otherwise, count on a sample of the table using TABLESAMPLE.
+ #
+ # The size of the sample is chosen in a way that we always roughly scan
+ # the same amount of rows (see TABLESAMPLE_ROW_TARGET).
+ #
+ # There are no guarantees with respect to the accuracy of the result or runtime.
+ class TablesampleCountStrategy < ReltuplesCountStrategy
+ EXACT_COUNT_THRESHOLD = 10_000
+ TABLESAMPLE_ROW_TARGET = 10_000
+
+ def count
+ estimates = size_estimates(check_statistics: false)
+
+ models.each_with_object({}) do |model, count_by_model|
+ count = perform_count(model, estimates[model])
+ count_by_model[model] = count if count
+ end
+ rescue *CONNECTION_ERRORS
+ {}
+ end
+
+ def self.enabled?
+ Gitlab::Database.postgresql? && Feature.enabled?(:tablesample_counts)
+ end
+
+ private
+
+ def perform_count(model, estimate)
+ # If we estimate 0, we may not have statistics at all. Don't use them.
+ return nil unless estimate && estimate > 0
+
+ if estimate < EXACT_COUNT_THRESHOLD
+ # The table is considered small, the assumption here is that
+ # the exact count will be fast anyways.
+ model.count
+ else
+ # The table is considered large, let's only count on a sample.
+ tablesample_count(model, estimate)
+ end
+ end
+
+ def tablesample_count(model, estimate)
+ portion = (TABLESAMPLE_ROW_TARGET.to_f / estimate).round(4)
+ inverse = 1 / portion
+ query = <<~SQL
+ SELECT (COUNT(*)*#{inverse})::integer AS count
+ FROM #{model.table_name} TABLESAMPLE SYSTEM (#{portion * 100})
+ SQL
+
+ rows = ActiveRecord::Base.connection.select_all(query)
+
+ Integer(rows.first['count'])
+ end
+ end
+ end
+ end
+end