summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDylan Griffith <dyl.griffith@gmail.com>2018-03-21 10:03:50 +1100
committerDylan Griffith <dyl.griffith@gmail.com>2018-04-19 18:35:40 +1000
commit03b020f2e4a4e48dc4ddceb1656b84aaa7938149 (patch)
tree41ec5d9630fa774c043d366b037ba35c5a9a7ab2
parent947ed8b88281a618530ae30bc0fe1e3d61a3c1d9 (diff)
downloadgitlab-ce-03b020f2e4a4e48dc4ddceb1656b84aaa7938149.tar.gz
Update ProjectStatistics#build_artifacts_size synchronously without summing (#41059)
Previously we scheduled a worker to just some this but we were running into performance issues when the build table was getting too large. So now we've updated the code such that this column is updated immediately and incremented/decremented by the correct amount whenever artifacts are created or deleted. We've also added the performance optimization that we do not update this statistic if a project is deleted because it could result in many updates for a project with many builds.
-rw-r--r--app/models/ci/build.rb24
-rw-r--r--app/models/ci/job_artifact.rb29
-rw-r--r--app/models/project_statistics.rb20
-rw-r--r--app/uploaders/job_artifact_uploader.rb8
-rw-r--r--changelogs/unreleased/41059-calculate-artifact-size-more-efficiently.yml5
-rw-r--r--spec/models/ci/build_spec.rb52
-rw-r--r--spec/models/ci/job_artifact_spec.rb56
-rw-r--r--spec/models/project_statistics_spec.rb80
8 files changed, 172 insertions, 102 deletions
diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb
index 4aa65bf4273..79408f69083 100644
--- a/app/models/ci/build.rb
+++ b/app/models/ci/build.rb
@@ -20,7 +20,7 @@ module Ci
has_one :last_deployment, -> { order('deployments.id DESC') }, as: :deployable, class_name: 'Deployment'
has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
- has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
+ has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
has_one :job_artifacts_archive, -> { where(file_type: Ci::JobArtifact.file_types[:archive]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_metadata, -> { where(file_type: Ci::JobArtifact.file_types[:metadata]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
has_one :job_artifacts_trace, -> { where(file_type: Ci::JobArtifact.file_types[:trace]) }, class_name: 'Ci::JobArtifact', inverse_of: :job, foreign_key: :job_id
@@ -95,8 +95,8 @@ module Ci
run_after_commit { BuildHooksWorker.perform_async(build.id) }
end
- after_commit :update_project_statistics_after_save, on: [:create, :update]
- after_commit :update_project_statistics, on: :destroy
+ after_save :update_project_statistics_after_save, if: :artifacts_size_changed?
+ after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
class << self
# This is needed for url_for to work,
@@ -664,16 +664,20 @@ module Ci
pipeline.config_processor.build_attributes(name)
end
- def update_project_statistics
- return unless project
+ def update_project_statistics_after_save
+ update_project_statistics(read_attribute(:artifacts_size).to_i - artifacts_size_was.to_i)
+ end
- ProjectCacheWorker.perform_async(project_id, [], [:build_artifacts_size])
+ def update_project_statistics_after_destroy
+ update_project_statistics(-artifacts_size)
end
- def update_project_statistics_after_save
- if previous_changes.include?('artifacts_size')
- update_project_statistics
- end
+ def update_project_statistics(difference)
+ ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
+ end
+
+ def project_destroyed?
+ project.pending_delete?
end
end
end
diff --git a/app/models/ci/job_artifact.rb b/app/models/ci/job_artifact.rb
index fbb95fe16df..f846482cdeb 100644
--- a/app/models/ci/job_artifact.rb
+++ b/app/models/ci/job_artifact.rb
@@ -9,6 +9,8 @@ module Ci
before_save :update_file_store
before_save :set_size, if: :file_changed?
+ after_save :update_project_statistics_after_save, if: :size_changed?
+ after_destroy :update_project_statistics_after_destroy, unless: :project_destroyed?
scope :with_files_stored_locally, -> { where(file_store: [nil, ::JobArtifactUploader::Store::LOCAL]) }
@@ -34,10 +36,6 @@ module Ci
[nil, ::JobArtifactUploader::Store::LOCAL].include?(self.file_store)
end
- def set_size
- self.size = file.size
- end
-
def expire_in
expire_at - Time.now if expire_at
end
@@ -48,5 +46,28 @@ module Ci
ChronicDuration.parse(value)&.seconds&.from_now
end
end
+
+ private
+
+ def set_size
+ self.size = file.size
+ end
+
+ def update_project_statistics_after_save
+ update_project_statistics(size.to_i - size_was.to_i)
+ end
+
+ def update_project_statistics_after_destroy
+ update_project_statistics(-self.size)
+ end
+
+ def update_project_statistics(difference)
+ ProjectStatistics.increment_statistic(project_id, :build_artifacts_size, difference)
+ end
+
+ def project_destroyed?
+ # Use job.project to avoid extra DB query for project
+ job.project.pending_delete?
+ end
end
end
diff --git a/app/models/project_statistics.rb b/app/models/project_statistics.rb
index 87a4350f022..5d4e3c34b39 100644
--- a/app/models/project_statistics.rb
+++ b/app/models/project_statistics.rb
@@ -4,15 +4,15 @@ class ProjectStatistics < ActiveRecord::Base
before_save :update_storage_size
- STORAGE_COLUMNS = [:repository_size, :lfs_objects_size, :build_artifacts_size].freeze
- STATISTICS_COLUMNS = [:commit_count] + STORAGE_COLUMNS
+ COLUMNS_TO_REFRESH = [:repository_size, :lfs_objects_size, :commit_count].freeze
+ INCREMENTABLE_COLUMNS = [:build_artifacts_size].freeze
def total_repository_size
repository_size + lfs_objects_size
end
def refresh!(only: nil)
- STATISTICS_COLUMNS.each do |column, generator|
+ COLUMNS_TO_REFRESH.each do |column, generator|
if only.blank? || only.include?(column)
public_send("update_#{column}") # rubocop:disable GitlabSecurity/PublicSend
end
@@ -34,13 +34,15 @@ class ProjectStatistics < ActiveRecord::Base
self.lfs_objects_size = project.lfs_objects.sum(:size)
end
- def update_build_artifacts_size
- self.build_artifacts_size =
- project.builds.sum(:artifacts_size) +
- Ci::JobArtifact.artifacts_size_for(self.project)
+ def update_storage_size
+ self.storage_size = repository_size + lfs_objects_size + build_artifacts_size
end
- def update_storage_size
- self.storage_size = STORAGE_COLUMNS.sum(&method(:read_attribute))
+ def self.increment_statistic(project_id, key, amount)
+ raise ArgumentError, "Cannot increment attribute: #{key}" unless key.in?(INCREMENTABLE_COLUMNS)
+ return if amount == 0
+
+ where(project_id: project_id)
+ .update_all(["#{key} = COALESCE(#{key}, 0) + (?)", amount])
end
end
diff --git a/app/uploaders/job_artifact_uploader.rb b/app/uploaders/job_artifact_uploader.rb
index dd86753479d..2a5a830ce4f 100644
--- a/app/uploaders/job_artifact_uploader.rb
+++ b/app/uploaders/job_artifact_uploader.rb
@@ -6,10 +6,10 @@ class JobArtifactUploader < GitlabUploader
storage_options Gitlab.config.artifacts
- def size
- return super if model.size.nil?
+ def cached_size
+ return model.size if model.size.present? && !model.file_changed?
- model.size
+ size
end
def store_dir
@@ -20,7 +20,7 @@ class JobArtifactUploader < GitlabUploader
if file_storage?
File.open(path, "rb") if path
else
- ::Gitlab::Ci::Trace::HttpIO.new(url, size) if url
+ ::Gitlab::Ci::Trace::HttpIO.new(url, cached_size) if url
end
end
diff --git a/changelogs/unreleased/41059-calculate-artifact-size-more-efficiently.yml b/changelogs/unreleased/41059-calculate-artifact-size-more-efficiently.yml
new file mode 100644
index 00000000000..e3f94bbf081
--- /dev/null
+++ b/changelogs/unreleased/41059-calculate-artifact-size-more-efficiently.yml
@@ -0,0 +1,5 @@
+---
+title: Improve DB performance of calculating total artifacts size
+merge_request: 17839
+author:
+type: performance
diff --git a/spec/models/ci/build_spec.rb b/spec/models/ci/build_spec.rb
index a12717835b0..c5e4e9c464f 100644
--- a/spec/models/ci/build_spec.rb
+++ b/spec/models/ci/build_spec.rb
@@ -1384,29 +1384,51 @@ describe Ci::Build do
end
end
- describe '#update_project_statistics' do
- let!(:build) { create(:ci_build, artifacts_size: 23) }
-
- it 'updates project statistics when the artifact size changes' do
- expect(ProjectCacheWorker).to receive(:perform_async)
- .with(build.project_id, [], [:build_artifacts_size])
+ context 'when updating the build' do
+ let(:build) { create(:ci_build, artifacts_size: 23) }
+ it 'updates project statistics' do
build.artifacts_size = 42
- build.save!
+
+ expect(build).to receive(:update_project_statistics_after_save).and_call_original
+
+ expect { build.save! }
+ .to change { build.project.statistics.reload.build_artifacts_size }
+ .by(19)
end
- it 'does not update project statistics when the artifact size stays the same' do
- expect(ProjectCacheWorker).not_to receive(:perform_async)
+ context 'when the artifact size stays the same' do
+ it 'does not update project statistics' do
+ build.name = 'changed'
- build.name = 'changed'
- build.save!
+ expect(build).not_to receive(:update_project_statistics_after_save)
+
+ build.save!
+ end
end
+ end
- it 'updates project statistics when the build is destroyed' do
- expect(ProjectCacheWorker).to receive(:perform_async)
- .with(build.project_id, [], [:build_artifacts_size])
+ context 'when destroying the build' do
+ let!(:build) { create(:ci_build, artifacts_size: 23) }
+
+ it 'updates project statistics' do
+ expect(ProjectStatistics)
+ .to receive(:increment_statistic)
+ .and_call_original
+
+ expect { build.destroy! }
+ .to change { build.project.statistics.reload.build_artifacts_size }
+ .by(-23)
+ end
+
+ context 'when the build is destroyed due to the project being destroyed' do
+ it 'does not update the project statistics' do
+ expect(ProjectStatistics)
+ .not_to receive(:increment_statistic)
- build.destroy
+ build.project.update_attributes(pending_delete: true)
+ build.project.destroy!
+ end
end
end
diff --git a/spec/models/ci/job_artifact_spec.rb b/spec/models/ci/job_artifact_spec.rb
index 1aa28434879..aad0026aa29 100644
--- a/spec/models/ci/job_artifact_spec.rb
+++ b/spec/models/ci/job_artifact_spec.rb
@@ -1,7 +1,7 @@
require 'spec_helper'
describe Ci::JobArtifact do
- set(:artifact) { create(:ci_job_artifact, :archive) }
+ let(:artifact) { create(:ci_job_artifact, :archive) }
describe "Associations" do
it { is_expected.to belong_to(:project) }
@@ -59,10 +59,32 @@ describe Ci::JobArtifact do
end
end
- describe '#set_size' do
- it 'sets the size' do
+ context 'creating the artifact' do
+ let(:project) { create(:project) }
+ let(:artifact) { create(:ci_job_artifact, :archive, project: project) }
+
+ it 'sets the size from the file size' do
expect(artifact.size).to eq(106365)
end
+
+ it 'updates the project statistics' do
+ expect { artifact }
+ .to change { project.statistics.reload.build_artifacts_size }
+ .by(106365)
+ end
+ end
+
+ context 'updating the artifact file' do
+ it 'updates the artifact size' do
+ artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png')))
+ expect(artifact.size).to eq(1062)
+ end
+
+ it 'updates the project statistics' do
+ expect { artifact.update!(file: fixture_file_upload(File.join(Rails.root, 'spec/fixtures/dk.png'))) }
+ .to change { artifact.project.statistics.reload.build_artifacts_size }
+ .by(1062 - 106365)
+ end
end
describe '#file' do
@@ -118,4 +140,32 @@ describe Ci::JobArtifact do
is_expected.to be_nil
end
end
+
+ context 'when destroying the artifact' do
+ let(:project) { create(:project, :repository) }
+ let(:pipeline) { create(:ci_pipeline, project: project) }
+ let!(:build) { create(:ci_build, :artifacts, pipeline: pipeline) }
+
+ it 'updates the project statistics' do
+ artifact = build.job_artifacts.first
+
+ expect(ProjectStatistics)
+ .to receive(:increment_statistic)
+ .and_call_original
+
+ expect { artifact.destroy }
+ .to change { project.statistics.reload.build_artifacts_size }
+ .by(-106365)
+ end
+
+ context 'when it is destroyed from the project level' do
+ it 'does not update the project statistics' do
+ expect(ProjectStatistics)
+ .not_to receive(:increment_statistic)
+
+ project.update_attributes(pending_delete: true)
+ project.destroy!
+ end
+ end
+ end
end
diff --git a/spec/models/project_statistics_spec.rb b/spec/models/project_statistics_spec.rb
index 5cff2af4aca..38a3590ad12 100644
--- a/spec/models/project_statistics_spec.rb
+++ b/spec/models/project_statistics_spec.rb
@@ -4,26 +4,6 @@ describe ProjectStatistics do
let(:project) { create :project }
let(:statistics) { project.statistics }
- describe 'constants' do
- describe 'STORAGE_COLUMNS' do
- it 'is an array of symbols' do
- expect(described_class::STORAGE_COLUMNS).to be_kind_of Array
- expect(described_class::STORAGE_COLUMNS.map(&:class).uniq).to eq [Symbol]
- end
- end
-
- describe 'STATISTICS_COLUMNS' do
- it 'is an array of symbols' do
- expect(described_class::STATISTICS_COLUMNS).to be_kind_of Array
- expect(described_class::STATISTICS_COLUMNS.map(&:class).uniq).to eq [Symbol]
- end
-
- it 'includes all storage columns' do
- expect(described_class::STATISTICS_COLUMNS & described_class::STORAGE_COLUMNS).to eq described_class::STORAGE_COLUMNS
- end
- end
- end
-
describe 'associations' do
it { is_expected.to belong_to(:project) }
it { is_expected.to belong_to(:namespace) }
@@ -63,7 +43,6 @@ describe ProjectStatistics do
allow(statistics).to receive(:update_commit_count)
allow(statistics).to receive(:update_repository_size)
allow(statistics).to receive(:update_lfs_objects_size)
- allow(statistics).to receive(:update_build_artifacts_size)
allow(statistics).to receive(:update_storage_size)
end
@@ -76,7 +55,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_commit_count)
expect(statistics).to have_received(:update_repository_size)
expect(statistics).to have_received(:update_lfs_objects_size)
- expect(statistics).to have_received(:update_build_artifacts_size)
end
end
@@ -89,7 +67,6 @@ describe ProjectStatistics do
expect(statistics).to have_received(:update_lfs_objects_size)
expect(statistics).not_to have_received(:update_commit_count)
expect(statistics).not_to have_received(:update_repository_size)
- expect(statistics).not_to have_received(:update_build_artifacts_size)
end
end
end
@@ -131,40 +108,6 @@ describe ProjectStatistics do
end
end
- describe '#update_build_artifacts_size' do
- let!(:pipeline) { create(:ci_pipeline, project: project) }
-
- context 'when new job artifacts are calculated' do
- let(:ci_build) { create(:ci_build, pipeline: pipeline) }
-
- before do
- create(:ci_job_artifact, :archive, project: pipeline.project, job: ci_build)
- end
-
- it "stores the size of related build artifacts" do
- statistics.update_build_artifacts_size
-
- expect(statistics.build_artifacts_size).to be(106365)
- end
-
- it 'calculates related build artifacts by project' do
- expect(Ci::JobArtifact).to receive(:artifacts_size_for).with(project) { 0 }
-
- statistics.update_build_artifacts_size
- end
- end
-
- context 'when legacy artifacts are used' do
- let!(:ci_build) { create(:ci_build, pipeline: pipeline, artifacts_size: 10.megabytes) }
-
- it "stores the size of related build artifacts" do
- statistics.update_build_artifacts_size
-
- expect(statistics.build_artifacts_size).to eq(10.megabytes)
- end
- end
- end
-
describe '#update_storage_size' do
it "sums all storage counters" do
statistics.update!(
@@ -177,4 +120,27 @@ describe ProjectStatistics do
expect(statistics.storage_size).to eq 5
end
end
+
+ describe '.increment_statistic' do
+ it 'increases the statistic by that amount' do
+ expect { described_class.increment_statistic(project.id, :build_artifacts_size, 13) }
+ .to change { statistics.reload.build_artifacts_size }
+ .by(13)
+ end
+
+ context 'when the amount is 0' do
+ it 'does not execute a query' do
+ project
+ expect { described_class.increment_statistic(project.id, :build_artifacts_size, 0) }
+ .not_to exceed_query_limit(0)
+ end
+ end
+
+ context 'when using an invalid column' do
+ it 'raises an error' do
+ expect { described_class.increment_statistic(project.id, :id, 13) }
+ .to raise_error(ArgumentError, "Cannot increment attribute: id")
+ end
+ end
+ end
end