summaryrefslogtreecommitdiff
path: root/lib/gitlab/database/count/reltuples_count_strategy.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/gitlab/database/count/reltuples_count_strategy.rb')
-rw-r--r--lib/gitlab/database/count/reltuples_count_strategy.rb79
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb
new file mode 100644
index 00000000000..c3a674aeb7e
--- /dev/null
+++ b/lib/gitlab/database/count/reltuples_count_strategy.rb
@@ -0,0 +1,79 @@
+# frozen_string_literal: true
+
+module Gitlab
+ module Database
+ module Count
+ class PgClass < ActiveRecord::Base
+ self.table_name = 'pg_class'
+ end
+
+ # This strategy counts based on PostgreSQL's statistics in pg_stat_user_tables.
+ #
+ # Specifically, it relies on the column reltuples in said table. An additional
+ # check is performed to make sure statistics were updated within the last hour.
+ #
+ # Otherwise, this strategy skips tables with outdated statistics.
+ #
+ # There are no guarantees with respect to the accuracy of this strategy. Runtime
+ # however is guaranteed to be "fast", because it only looks up statistics.
+ class ReltuplesCountStrategy
+ attr_reader :models
+ def initialize(models)
+ @models = models
+ end
+
+ # Returns a hash of the table names that have recently updated tuples.
+ #
+ # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
+ def count
+ size_estimates
+ rescue *CONNECTION_ERRORS
+ {}
+ end
+
+ def self.enabled?
+ Gitlab::Database.postgresql?
+ end
+
+ private
+
+ def table_names
+ models.map(&:table_name)
+ end
+
+ def size_estimates(check_statistics: true)
+ table_to_model = models.each_with_object({}) { |model, h| h[model.table_name] = model }
+
+ # Querying tuple stats only works on the primary. Due to load balancing, the
+ # easiest way to do this is to start a transaction.
+ ActiveRecord::Base.transaction do
+ get_statistics(table_names, check_statistics: check_statistics).each_with_object({}) do |row, data|
+ model = table_to_model[row.table_name]
+ data[model] = row.estimate
+ end
+ end
+ end
+
+ # Generates the PostgreSQL query to return the tuples for tables
+ # that have been vacuumed or analyzed in the last hour.
+ #
+ # @param [Array] table names
+ # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 })
+ def get_statistics(table_names, check_statistics: true)
+ time = 1.hour.ago
+
+ query = PgClass.joins("LEFT JOIN pg_stat_user_tables USING (relname)")
+ .where(relname: table_names)
+ .select('pg_class.relname AS table_name, reltuples::bigint AS estimate')
+
+ if check_statistics
+ query = query.where('last_vacuum > ? OR last_autovacuum > ? OR last_analyze > ? OR last_autoanalyze > ?',
+ time, time, time, time)
+ end
+
+ query
+ end
+ end
+ end
+ end
+end