summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/count/tablesample_count_strategy.rb
blob: 7777f31f702d1012789331fd7fdb18d2316cfc4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# frozen_string_literal: true

module Gitlab
  module Database
    module Count
      # A tablesample count executes in two phases:
      # * Estimate table sizes based on reltuples.
      # * Based on the estimate:
      #   * If the table is considered 'small', execute an exact relation count.
      #   * Otherwise, count on a sample of the table using TABLESAMPLE.
      #
      # The size of the sample is chosen in a way that we always roughly scan
      # the same amount of rows (see TABLESAMPLE_ROW_TARGET).
      #
      # There are no guarantees with respect to the accuracy of the result or runtime.
      class TablesampleCountStrategy < ReltuplesCountStrategy
        EXACT_COUNT_THRESHOLD = 10_000
        TABLESAMPLE_ROW_TARGET = 10_000

        def count
          estimates = size_estimates(check_statistics: false)

          models.each_with_object({}) do |model, count_by_model|
            count = perform_count(model, estimates[model])
            count_by_model[model] = count if count
          end
        rescue *CONNECTION_ERRORS
          {}
        end

        def self.enabled?
          Gitlab::Database.postgresql? && Feature.enabled?(:tablesample_counts)
        end

        private

        def perform_count(model, estimate)
          # If we estimate 0, we may not have statistics at all. Don't use them.
          return unless estimate && estimate > 0

          if estimate < EXACT_COUNT_THRESHOLD
            # The table is considered small, the assumption here is that
            # the exact count will be fast anyways.
            model.count
          else
            # The table is considered large, let's only count on a sample.
            tablesample_count(model, estimate)
          end
        end

        def where_clause(model)
          return unless sti_model?(model)

          "WHERE #{model.inheritance_column} = '#{model.name}'"
        end

        def tablesample_count(model, estimate)
          portion = (TABLESAMPLE_ROW_TARGET.to_f / estimate).round(4)
          inverse = 1 / portion
          query = <<~SQL
            SELECT (COUNT(*)*#{inverse})::integer AS count
            FROM #{model.table_name}
            TABLESAMPLE SYSTEM (#{portion * 100})
            REPEATABLE (0)
            #{where_clause(model)}
          SQL

          rows = ActiveRecord::Base.connection.select_all(query)

          Integer(rows.first['count'])
        end
      end
    end
  end
end