app/models/project_statistics.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163

# frozen_string_literal: true

class ProjectStatistics < ApplicationRecord
  include AfterCommitQueue
  include CounterAttribute

  belongs_to :project
  belongs_to :namespace

  default_value_for :wiki_size, 0
  default_value_for :snippets_size, 0

  counter_attribute :build_artifacts_size
  counter_attribute :storage_size

  counter_attribute_after_flush do |project_statistic|
    Namespaces::ScheduleAggregationWorker.perform_async(project_statistic.namespace_id)
  end

  before_save :update_storage_size

  COLUMNS_TO_REFRESH = [:repository_size, :wiki_size, :lfs_objects_size, :commit_count, :snippets_size, :uploads_size].freeze
  INCREMENTABLE_COLUMNS = {
    build_artifacts_size: %i[storage_size],
    packages_size: %i[storage_size],
    pipeline_artifacts_size: %i[storage_size],
    snippets_size: %i[storage_size]
  }.freeze
  NAMESPACE_RELATABLE_COLUMNS = [:repository_size, :wiki_size, :lfs_objects_size, :uploads_size].freeze

  scope :for_project_ids, ->(project_ids) { where(project_id: project_ids) }

  scope :for_namespaces, -> (namespaces) { where(namespace: namespaces) }

  def total_repository_size
    repository_size + lfs_objects_size
  end

  def refresh!(only: [])
    return if Gitlab::Database.read_only?

    COLUMNS_TO_REFRESH.each do |column, generator|
      if only.empty? || only.include?(column)
        public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend
      end
    end

    if only.empty? || only.any? { |column| NAMESPACE_RELATABLE_COLUMNS.include?(column) }
      schedule_namespace_aggregation_worker
    end

    save!
  end

  def update_commit_count
    self.commit_count = project.repository.commit_count
  end

  def update_repository_size
    self.repository_size = project.repository.size * 1.megabyte
  end

  def update_wiki_size
    self.wiki_size = project.wiki.repository.size * 1.megabyte
  end

  def update_snippets_size
    self.snippets_size = project.snippets.with_statistics.sum(:repository_size)
  end

  def update_lfs_objects_size
    self.lfs_objects_size = project.lfs_objects.sum(:size)
  end

  def update_uploads_size
    return uploads_size unless Feature.enabled?(:count_uploads_size_in_storage_stats, project)

    self.uploads_size = project.uploads.sum(:size)
  end

  # `wiki_size` and `snippets_size` have no default value in the database
  # and the column can be nil.
  # This means that, when the columns were added, all rows had nil
  # values on them.
  # Therefore, any call to any of those methods will return nil instead
  # of 0, because `default_value_for` works with new records, not existing ones.
  #
  # These two methods provide consistency and avoid returning nil.
  def wiki_size
    super.to_i
  end

  def snippets_size
    super.to_i
  end

  def update_storage_size
    storage_size = repository_size + wiki_size + lfs_objects_size + build_artifacts_size + packages_size
    # The `snippets_size` column was added on 20200622095419 but db/post_migrate/20190527194900_schedule_calculate_wiki_sizes.rb
    # might try to update project statistics before the `snippets_size` column has been created.
    storage_size += snippets_size if self.class.column_names.include?('snippets_size')

    # The `pipeline_artifacts_size` column was added on 20200817142800 but db/post_migrate/20190527194900_schedule_calculate_wiki_sizes.rb
    # might try to update project statistics before the `pipeline_artifacts_size` column has been created.
    storage_size += pipeline_artifacts_size if self.class.column_names.include?('pipeline_artifacts_size')

    # The `uploads_size` column was added on 20201105021637 but db/post_migrate/20190527194900_schedule_calculate_wiki_sizes.rb
    # might try to update project statistics before the `uploads_size` column has been created.
    storage_size += uploads_size if self.class.column_names.include?('uploads_size')

    self.storage_size = storage_size
  end

  # Since this incremental update method does not call update_storage_size above,
  # we have to update the storage_size here as additional column.
  # Additional columns are updated depending on key => [columns], which allows
  # to update statistics which are and also those which aren't included in storage_size
  # or any other additional summary column in the future.
  def self.increment_statistic(project, key, amount)
    raise ArgumentError, "Cannot increment attribute: #{key}" unless INCREMENTABLE_COLUMNS.key?(key)
    return if amount == 0

    project.statistics.try do |project_statistics|
      if project_statistics.counter_attribute_enabled?(key)
        statistics_to_increment = [key] + INCREMENTABLE_COLUMNS[key].to_a
        statistics_to_increment.each do |statistic|
          project_statistics.delayed_increment_counter(statistic, amount)
        end
      else
        legacy_increment_statistic(project, key, amount)
      end
    end
  end

  def self.legacy_increment_statistic(project, key, amount)
    where(project_id: project.id).columns_to_increment(key, amount)

    Namespaces::ScheduleAggregationWorker.perform_async( # rubocop: disable CodeReuse/Worker
      project.namespace_id)
  end

  def self.columns_to_increment(key, amount)
    updates = ["#{key} = COALESCE(#{key}, 0) + (#{amount})"]

    if (additional = INCREMENTABLE_COLUMNS[key])
      additional.each do |column|
        updates << "#{column} = COALESCE(#{column}, 0) + (#{amount})"
      end
    end

    update_all(updates.join(', '))
  end

  private

  def schedule_namespace_aggregation_worker
    run_after_commit do
      Namespaces::ScheduleAggregationWorker.perform_async(project.namespace_id)
    end
  end
end

ProjectStatistics.prepend_if_ee('EE::ProjectStatistics')