diff options
author | Dylan Griffith <dyl.griffith@gmail.com> | 2018-03-21 10:03:50 +1100 |
---|---|---|
committer | Dylan Griffith <dyl.griffith@gmail.com> | 2018-03-21 10:03:52 +1100 |
commit | 392e4ee3bbd2b57bc79a025e13cd0dac11653d89 (patch) | |
tree | 205f904e48a1c804e141c95eac99d647bcbfbe8a | |
parent | 5778de6720d777abbaea00809b7e0f92d73cbdb7 (diff) | |
download | gitlab-ce-41059-calculate-artifact-size-more-efficiently.tar.gz |
Update ProjectStatistics#build_artifacts_size synchronously without summing (#41059)41059-calculate-artifact-size-more-efficiently
Previously we scheduled a worker to just some this but we were running
into performance issues when the build table was getting too large. So
now we've updated the code such that this column is updated immediately
and incremented/decremented by the correct amount whenever artifacts are
created or deleted.
We've also added the performance optimization that we do not update this
statistic if a project is deleted because it could result in many
updates for a project with many builds.
-rw-r--r-- | app/models/ci/build.rb | 31 | ||||
-rw-r--r-- | app/models/ci/job_artifact.rb | 22 | ||||
-rw-r--r-- | app/models/project_statistics.rb | 13 | ||||
-rw-r--r-- | spec/models/ci/build_spec.rb | 42 | ||||
-rw-r--r-- | spec/models/ci/job_artifact_spec.rb | 39 | ||||
-rw-r--r-- | spec/models/project_statistics_spec.rb | 57 |
6 files changed, 105 insertions, 99 deletions
diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index c1da2081465..9bb162834dd 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -18,7 +18,7 @@ module Ci has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment' has_many :trace_sections, class_name: 'Ci::BuildTraceSection' - has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent + has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id @@ -85,8 +85,8 @@ module Ci run_after_commit { BuildHooksWorker.perform_async(build.id) } end - after_commit :update_project_statistics_after_save, on: [:create, :update] - after_commit :update_project_statistics, on: :destroy + after_save :update_project_statistics_after_save + after_destroy :update_project_statistics_after_destroy class << self # This is needed for url_for to work, @@ -603,16 +603,25 @@ module Ci pipeline.config_processor.build_attributes(name) end - def update_project_statistics - return unless project - - ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size]) - end - def update_project_statistics_after_save - if previous_changes.include?('artifacts_size') - update_project_statistics + if changes.include?(:artifacts_size) + old_size = changes[:artifacts_size][0] || 0 + new_size = artifacts_size || 0 + ProjectStatistics.update_counters( + project.statistics.id, + build_artifacts_size: new_size - old_size + ) end end + + def update_project_statistics_after_destroy + # No need to update statistics if the project is being destroyed + return if destroyed_by_association&.active_record == Project + + ProjectStatistics.update_counters( + project.statistics.id, + build_artifacts_size: -artifacts_size + ) + end end end diff --git a/app/models/ci/job_artifact.rb b/app/models/ci/job_artifact.rb index 0a599f72bc7..366e4715bda 100644 --- a/app/models/ci/job_artifact.rb +++ b/app/models/ci/job_artifact.rb @@ -5,7 +5,8 @@ module Ci belongs_to :project belongs_to :job, class_name: "Ci::Build", foreign_key: :job_id - before_save :set_size, if: :file_changed? + before_create :set_size + after_destroy :update_project_statistics_after_destroy mount_uploader :file, JobArtifactUploader @@ -17,12 +18,23 @@ module Ci trace: 3 } - def self.artifacts_size_for(project) - self.where(project: project).sum(:size) + def set_size + new_size = file.size + ProjectStatistics.update_counters( + project.statistics.id, + build_artifacts_size: new_size + ) + self.size = new_size end - def set_size - self.size = file.size + def update_project_statistics_after_destroy + # No need to update statistics if the project is being destroyed + return if destroyed_by_association && job.destroyed_by_association.active_record == Project + + ProjectStatistics.update_counters( + project.statistics.id, + build_artifacts_size: -self.size + ) end def expire_in diff --git a/app/models/project_statistics.rb b/app/models/project_statistics.rb index 87a4350f022..fd3749f7e6c 100644 --- a/app/models/project_statistics.rb +++ b/app/models/project_statistics.rb @@ -4,15 +4,14 @@ class ProjectStatistics < ActiveRecord::Base before_save :update_storage_size - STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size].freeze - STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS + COLUMNS_TO_REFRESH = [:repository_size, :lfs_objects_size, :commit_count].freeze def total_repository_size repository_size + lfs_objects_size end def refresh!(only: nil) - STATISTICS_COLUMNS.each do |column, generator| + COLUMNS_TO_REFRESH.each do |column, generator| if only.blank? || only.include?(column) public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend end @@ -34,13 +33,7 @@ class ProjectStatistics < ActiveRecord::Base self.lfs_objects_size = project.lfs_objects.sum(:size) end - def update_build_artifacts_size - self.build_artifacts_size = - project.builds.sum(:artifacts_size) + - Ci::JobArtifact.artifacts_size_for(self.project) - end - def update_storage_size - self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute)) + self.storage_size = repository_size + lfs_objects_size + build_artifacts_size end end diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb index 9da3de7a828..fdfc2ee2b30 100644 --- a/spec/models/ci/build_spec.rb +++ b/spec/models/ci/build_spec.rb @@ -1368,30 +1368,46 @@ describe Ci::Build do end end - describe '#update_project_statistics' do - let!(:build) { create(:ci_build, artifacts_size: 23) } - - it 'updates project statistics when the artifact size changes' do - expect(ProjectCacheWorker).to receive(:perform_async) - .with(build.project_id, [], [:build_artifacts_size]) + context 'when updating the build' do + let(:build) { create(:ci_build, artifacts_size: 23) } + it 'updates project statistics' do build.artifacts_size = 42 + + expect(ProjectStatistics).to receive(:update_counters) + .with(build.project.statistics.id, build_artifacts_size: 19) + build.save! end - it 'does not update project statistics when the artifact size stays the same' do - expect(ProjectCacheWorker).not_to receive(:perform_async) + context 'when the artifact size stays the same' do + it 'does not update project statistics' do + build.name = 'changed' - build.name = 'changed' - build.save! + expect(ProjectStatistics).not_to receive(:update_counters) + + build.save! + end end + end + + context 'when destroying the build' do + let!(:build) { create(:ci_build, artifacts_size: 23) } - it 'updates project statistics when the build is destroyed' do - expect(ProjectCacheWorker).to receive(:perform_async) - .with(build.project_id, [], [:build_artifacts_size]) + it 'updates project statistics' do + expect(ProjectStatistics).to receive(:update_counters) + .with(build.project.statistics.id, build_artifacts_size: -23) build.destroy end + + context 'when the build is destroyed due to the project being destroyed' do + it 'does not update the project statistics' do + expect(ProjectStatistics).not_to receive(:update_counters) + + build.project.destroy + end + end end describe '#when' do diff --git a/spec/models/ci/job_artifact_spec.rb b/spec/models/ci/job_artifact_spec.rb index a2bd36537e6..428f20ded9a 100644 --- a/spec/models/ci/job_artifact_spec.rb +++ b/spec/models/ci/job_artifact_spec.rb @@ -1,7 +1,7 @@ require 'spec_helper' describe Ci::JobArtifact do - set(:artifact) { create(:ci_job_artifact, :archive) } + let(:artifact) { create(:ci_job_artifact, :archive) } describe "Associations" do it { is_expected.to belong_to(:project) } @@ -15,10 +15,19 @@ describe Ci::JobArtifact do it { is_expected.to delegate_method(:open).to(:file) } it { is_expected.to delegate_method(:exists?).to(:file) } - describe '#set_size' do - it 'sets the size' do + before do + allow(ProjectStatistics).to receive(:update_counters) + end + + context 'creating the artifact' do + it 'sets the size from the file size' do expect(artifact.size).to eq(106365) end + + it 'updates the project statistics' do + expect(ProjectStatistics).to have_received(:update_counters) + .with(artifact.project.statistics.id, build_artifacts_size: 106365) + end end describe '#file' do @@ -74,4 +83,28 @@ describe Ci::JobArtifact do is_expected.to be_nil end end + + context 'when destroying the artifact' do + let!(:project) { create(:project, :repository) } + let(:pipeline) { create(:ci_pipeline, project: project) } + let!(:build) { create(:ci_build, :artifacts, pipeline: pipeline) } + + it 'updates the project statistics' do + artifact = build.job_artifacts.first + + artifact.destroy + + expect(ProjectStatistics).to have_received(:update_counters) + .with(project.statistics.id, build_artifacts_size: -106365) + end + + context 'when it is destroyed from the project level' do + it 'does not update the project statistics' do + project.destroy + + expect(ProjectStatistics).not_to have_received(:update_counters) + .with(project.statistics.id, build_artifacts_size: -106365) + end + end + end end diff --git a/spec/models/project_statistics_spec.rb b/spec/models/project_statistics_spec.rb index 5cff2af4aca..842162592a6 100644 --- a/spec/models/project_statistics_spec.rb +++ b/spec/models/project_statistics_spec.rb @@ -4,26 +4,6 @@ describe ProjectStatistics do let(:project) { create :project } let(:statistics) { project.statistics } - describe 'constants' do - describe 'STORAGE_COLUMNS' do - it 'is an array of symbols' do - expect(described_class::STORAGE_COLUMNS).to be_kind_of Array - expect(described_class::STORAGE_COLUMNS.map(&:class).uniq).to eq [Symbol] - end - end - - describe 'STATISTICS_COLUMNS' do - it 'is an array of symbols' do - expect(described_class::STATISTICS_COLUMNS).to be_kind_of Array - expect(described_class::STATISTICS_COLUMNS.map(&:class).uniq).to eq [Symbol] - end - - it 'includes all storage columns' do - expect(described_class::STATISTICS_COLUMNS & described_class::STORAGE_COLUMNS).to eq described_class::STORAGE_COLUMNS - end - end - end - describe 'associations' do it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:namespace) } @@ -63,7 +43,6 @@ describe ProjectStatistics do allow(statistics).to receive(:update_commit_count) allow(statistics).to receive(:update_repository_size) allow(statistics).to receive(:update_lfs_objects_size) - allow(statistics).to receive(:update_build_artifacts_size) allow(statistics).to receive(:update_storage_size) end @@ -76,7 +55,6 @@ describe ProjectStatistics do expect(statistics).to have_received(:update_commit_count) expect(statistics).to have_received(:update_repository_size) expect(statistics).to have_received(:update_lfs_objects_size) - expect(statistics).to have_received(:update_build_artifacts_size) end end @@ -89,7 +67,6 @@ describe ProjectStatistics do expect(statistics).to have_received(:update_lfs_objects_size) expect(statistics).not_to have_received(:update_commit_count) expect(statistics).not_to have_received(:update_repository_size) - expect(statistics).not_to have_received(:update_build_artifacts_size) end end end @@ -131,40 +108,6 @@ describe ProjectStatistics do end end - describe '#update_build_artifacts_size' do - let!(:pipeline) { create(:ci_pipeline, project: project) } - - context 'when new job artifacts are calculated' do - let(:ci_build) { create(:ci_build, pipeline: pipeline) } - - before do - create(:ci_job_artifact, :archive, project: pipeline.project, job: ci_build) - end - - it "stores the size of related build artifacts" do - statistics.update_build_artifacts_size - - expect(statistics.build_artifacts_size).to be(106365) - end - - it 'calculates related build artifacts by project' do - expect(Ci::JobArtifact).to receive(:artifacts_size_for).with(project) { 0 } - - statistics.update_build_artifacts_size - end - end - - context 'when legacy artifacts are used' do - let!(:ci_build) { create(:ci_build, pipeline: pipeline, artifacts_size: 10.megabytes) } - - it "stores the size of related build artifacts" do - statistics.update_build_artifacts_size - - expect(statistics.build_artifacts_size).to eq(10.megabytes) - end - end - end - describe '#update_storage_size' do it "sums all storage counters" do statistics.update!( |