diff options
Diffstat (limited to 'lib/gitlab/database/count.rb')
-rw-r--r-- | lib/gitlab/database/count.rb | 72 |
1 files changed, 55 insertions, 17 deletions
diff --git a/lib/gitlab/database/count.rb b/lib/gitlab/database/count.rb index 3374203960e..5f549ed2b3c 100644 --- a/lib/gitlab/database/count.rb +++ b/lib/gitlab/database/count.rb @@ -17,31 +17,69 @@ module Gitlab ].freeze end - def self.approximate_count(model) - return model.count unless Gitlab::Database.postgresql? + # Takes in an array of models and returns a Hash for the approximate + # counts for them. If the model's table has not been vacuumed or + # analyzed recently, simply run the Model.count to get the data. + # + # @param [Array] + # @return [Hash] of Model -> count mapping + def self.approximate_counts(models) + table_to_model_map = models.each_with_object({}) do |model, hash| + hash[model.table_name] = model + end - execute_estimate_if_updated_recently(model) || model.count - end + table_names = table_to_model_map.keys + counts_by_table_name = Gitlab::Database.postgresql? ? reltuples_from_recently_updated(table_names) : {} - def self.execute_estimate_if_updated_recently(model) - ActiveRecord::Base.connection.select_value(postgresql_estimate_query(model)).to_i if reltuples_updated_recently?(model) - rescue *CONNECTION_ERRORS + # Convert table -> count to Model -> count + counts_by_model = counts_by_table_name.each_with_object({}) do |pair, hash| + model = table_to_model_map[pair.first] + hash[model] = pair.second + end + + missing_tables = table_names - counts_by_table_name.keys + + missing_tables.each do |table| + model = table_to_model_map[table] + counts_by_model[model] = model.count + end + + counts_by_model end - def self.reltuples_updated_recently?(model) - time = "to_timestamp(#{1.hour.ago.to_i})" - query = <<~SQL - SELECT 1 FROM pg_stat_user_tables WHERE relname = '#{model.table_name}' AND - (last_vacuum > #{time} OR last_autovacuum > #{time} OR last_analyze > #{time} OR last_autoanalyze > #{time}) - SQL + # Returns a hash of the table names that have recently updated tuples. + # + # @param [Array] table names + # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 }) + def self.reltuples_from_recently_updated(table_names) + query = postgresql_estimate_query(table_names) + rows = [] - ActiveRecord::Base.connection.select_all(query).count > 0 + # Querying tuple stats only works on the primary. Due to load + # balancing, we need to ensure this query hits the load balancer. The + # easiest way to do this is to start a transaction. + ActiveRecord::Base.transaction do + rows = ActiveRecord::Base.connection.select_all(query) + end + + rows.each_with_object({}) { |row, data| data[row['table_name']] = row['estimate'].to_i } rescue *CONNECTION_ERRORS - false + {} end - def self.postgresql_estimate_query(model) - "SELECT reltuples::bigint AS estimate FROM pg_class where relname = '#{model.table_name}'" + # Generates the PostgreSQL query to return the tuples for tables + # that have been vacuumed or analyzed in the last hour. + # + # @param [Array] table names + # @returns [Hash] Table name to count mapping (e.g. { 'projects' => 5, 'users' => 100 }) + def self.postgresql_estimate_query(table_names) + time = "to_timestamp(#{1.hour.ago.to_i})" + <<~SQL + SELECT pg_class.relname AS table_name, reltuples::bigint AS estimate FROM pg_class + LEFT JOIN pg_stat_user_tables ON pg_class.relname = pg_stat_user_tables.relname + WHERE pg_class.relname IN (#{table_names.map { |table| "'#{table}'" }.join(',')}) + AND (last_vacuum > #{time} OR last_autovacuum > #{time} OR last_analyze > #{time} OR last_autoanalyze > #{time}) + SQL end end end |