diff options
Diffstat (limited to 'lib/gitlab/database/count/tablesample_count_strategy.rb')
-rw-r--r-- | lib/gitlab/database/count/tablesample_count_strategy.rb | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/lib/gitlab/database/count/tablesample_count_strategy.rb b/lib/gitlab/database/count/tablesample_count_strategy.rb new file mode 100644 index 00000000000..cf1cf054dbf --- /dev/null +++ b/lib/gitlab/database/count/tablesample_count_strategy.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Count + # A tablesample count executes in two phases: + # * Estimate table sizes based on reltuples. + # * Based on the estimate: + # * If the table is considered 'small', execute an exact relation count. + # * Otherwise, count on a sample of the table using TABLESAMPLE. + # + # The size of the sample is chosen in a way that we always roughly scan + # the same amount of rows (see TABLESAMPLE_ROW_TARGET). + # + # There are no guarantees with respect to the accuracy of the result or runtime. + class TablesampleCountStrategy < ReltuplesCountStrategy + EXACT_COUNT_THRESHOLD = 10_000 + TABLESAMPLE_ROW_TARGET = 10_000 + + def count + estimates = size_estimates(check_statistics: false) + + models.each_with_object({}) do |model, count_by_model| + count = perform_count(model, estimates[model]) + count_by_model[model] = count if count + end + rescue *CONNECTION_ERRORS + {} + end + + def self.enabled? + Gitlab::Database.postgresql? && Feature.enabled?(:tablesample_counts) + end + + private + + def perform_count(model, estimate) + # If we estimate 0, we may not have statistics at all. Don't use them. + return nil unless estimate && estimate > 0 + + if estimate < EXACT_COUNT_THRESHOLD + # The table is considered small, the assumption here is that + # the exact count will be fast anyways. + model.count + else + # The table is considered large, let's only count on a sample. + tablesample_count(model, estimate) + end + end + + def tablesample_count(model, estimate) + portion = (TABLESAMPLE_ROW_TARGET.to_f / estimate).round(4) + inverse = 1 / portion + query = <<~SQL + SELECT (COUNT(*)*#{inverse})::integer AS count + FROM #{model.table_name} TABLESAMPLE SYSTEM (#{portion * 100}) + SQL + + rows = ActiveRecord::Base.connection.select_all(query) + + Integer(rows.first['count']) + end + end + end + end +end |