diff options
Diffstat (limited to 'lib/gitlab/database/count/reltuples_count_strategy.rb')
-rw-r--r-- | lib/gitlab/database/count/reltuples_count_strategy.rb | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/gitlab/database/count/reltuples_count_strategy.rb b/lib/gitlab/database/count/reltuples_count_strategy.rb new file mode 100644 index 00000000000..c3a674aeb7e --- /dev/null +++ b/lib/gitlab/database/count/reltuples_count_strategy.rb @@ -0,0 +1,79 @@ +# frozen_string_literal: true + +module Gitlab + module Database + module Count + class PgClass < ActiveRecord::Base + self.table_name = 'pg_class' + end + + # This strategy counts based on PostgreSQL's statistics in pg_stat_user_tables. + # + # Specifically, it relies on the column reltuples in said table. An additional + # check is performed to make sure statistics were updated within the last hour. + # + # Otherwise, this strategy skips tables with outdated statistics. + # + # There are no guarantees with respect to the accuracy of this strategy. Runtime + # however is guaranteed to be "fast", because it only looks up statistics. + class ReltuplesCountStrategy + attr_reader :models + def initialize(models) + @models = models + end + + # Returns a hash of the table names that have recently updated tuples. + # + # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 }) + def count + size_estimates + rescue *CONNECTION_ERRORS + {} + end + + def self.enabled? + Gitlab::Database.postgresql? + end + + private + + def table_names + models.map(&:table_name) + end + + def size_estimates(check_statistics: true) + table_to_model = models.each_with_object({}) { |model, h| h[model.table_name] = model } + + # Querying tuple stats only works on the primary. Due to load balancing, the + # easiest way to do this is to start a transaction. + ActiveRecord::Base.transaction do + get_statistics(table_names, check_statistics: check_statistics).each_with_object({}) do |row, data| + model = table_to_model[row.table_name] + data[model] = row.estimate + end + end + end + + # Generates the PostgreSQL query to return the tuples for tables + # that have been vacuumed or analyzed in the last hour. + # + # @param [Array] table names + # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 }) + def get_statistics(table_names, check_statistics: true) + time = 1.hour.ago + + query = PgClass.joins("LEFT JOIN pg_stat_user_tables USING (relname)") + .where(relname: table_names) + .select('pg_class.relname AS table_name, reltuples::bigint AS estimate') + + if check_statistics + query = query.where('last_vacuum > ? OR last_autovacuum > ? OR last_analyze > ? OR last_autoanalyze > ?', + time, time, time, time) + end + + query + end + end + end + end +end |