From dabd91b2c8a42ac0d0c357190002a5a4b96a57a6 Mon Sep 17 00:00:00 2001 From: Toon Claes Date: Thu, 13 Jun 2019 23:07:59 +0200 Subject: Add rake task to clean orphan artifact files This adds the rake task rake gitlab:cleanup:orphan_job_artifact_files. This rake task cleans all orphan job artifact files it can find on disk. It performs a search on the complete folder of all artifacts on disk. Then it filters out all the job artifact ID for which it could not find a record with matching ID in the database. For these, the file is deleted from disk. --- .../orphan_job_artifact_files_batch_spec.rb | 66 +++++++++++++++++++++ .../cleanup/orphan_job_artifact_files_spec.rb | 68 ++++++++++++++++++++++ spec/tasks/gitlab/cleanup_rake_spec.rb | 29 +++++++++ 3 files changed, 163 insertions(+) create mode 100644 spec/lib/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb create mode 100644 spec/lib/gitlab/cleanup/orphan_job_artifact_files_spec.rb (limited to 'spec') diff --git a/spec/lib/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb b/spec/lib/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb new file mode 100644 index 00000000000..4d8edfeac80 --- /dev/null +++ b/spec/lib/gitlab/cleanup/orphan_job_artifact_files_batch_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::Cleanup::OrphanJobArtifactFilesBatch do + let(:batch_size) { 10 } + let(:dry_run) { true } + + subject(:batch) { described_class.new(batch_size: batch_size, dry_run: dry_run) } + + context 'no dry run' do + let(:dry_run) { false } + + it 'deletes only orphan job artifacts from disk' do + job_artifact = create(:ci_job_artifact, :archive) + orphan_artifact = create(:ci_job_artifact, :archive) + batch << artifact_path(job_artifact) + batch << artifact_path(orphan_artifact) + orphan_artifact.delete + + batch.clean! + + expect(batch.artifact_files.count).to eq(2) + expect(batch.lost_and_found.count).to eq(1) + expect(batch.lost_and_found.first.artifact_id).to eq(orphan_artifact.id) + end + + it 'does not mix up job ID and artifact ID' do + # take maximum ID of both tables to avoid any collision + max_id = [Ci::Build.maximum(:id), Ci::JobArtifact.maximum(:id)].compact.max.to_i + job_a = create(:ci_build, id: max_id + 1) + job_b = create(:ci_build, id: max_id + 2) + # reuse the build IDs for the job artifact IDs, but swap them + job_artifact_b = create(:ci_job_artifact, :archive, job: job_b, id: max_id + 1) + job_artifact_a = create(:ci_job_artifact, :archive, job: job_a, id: max_id + 2) + + batch << artifact_path(job_artifact_a) + batch << artifact_path(job_artifact_b) + + job_artifact_b.delete + + batch.clean! + + expect(File.exist?(job_artifact_a.file.path)).to be_truthy + expect(File.exist?(job_artifact_b.file.path)).to be_falsey + end + end + + context 'with dry run' do + it 'does not remove files' do + job_artifact = create(:ci_job_artifact, :archive) + batch << job_artifact.file.path + job_artifact.delete + + expect(batch).not_to receive(:remove_file!) + + batch.clean! + + expect(File.exist?(job_artifact.file.path)).to be_truthy + end + end + + def artifact_path(job_artifact) + Pathname.new(job_artifact.file.path).parent.to_s + end +end diff --git a/spec/lib/gitlab/cleanup/orphan_job_artifact_files_spec.rb b/spec/lib/gitlab/cleanup/orphan_job_artifact_files_spec.rb new file mode 100644 index 00000000000..974cc2c4660 --- /dev/null +++ b/spec/lib/gitlab/cleanup/orphan_job_artifact_files_spec.rb @@ -0,0 +1,68 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Gitlab::Cleanup::OrphanJobArtifactFiles do + let(:null_logger) { Logger.new('/dev/null') } + subject(:cleanup) { described_class.new(logger: null_logger) } + + before do + allow(null_logger).to receive(:info) + end + + it 'passes on dry_run' do + expect(Gitlab::Cleanup::OrphanJobArtifactFilesBatch) + .to receive(:new) + .with(dry_run: false, batch_size: anything, logger: anything) + .at_least(:once) + .and_call_original + + described_class.new(dry_run: false).run! + end + + it 'errors when invalid niceness is given' do + cleanup = described_class.new(logger: null_logger, niceness: 'FooBar') + + expect(null_logger).to receive(:error).with(/FooBar/) + + cleanup.run! + end + + it 'finds artifacts on disk' do + artifact = create(:ci_job_artifact, :archive) + + expect(cleanup).to receive(:find_artifacts).and_yield(artifact.file.path) + cleanup.run! + end + + it 'stops when limit is reached' do + cleanup = described_class.new(limit: 1) + + mock_artifacts_found(cleanup, 'tmp/foo/bar/1', 'tmp/foo/bar/2') + + cleanup.run! + + expect(cleanup.total_found).to eq(1) + end + + it 'cleans even if batch is not full' do + mock_artifacts_found(cleanup, 'tmp/foo/bar/1') + + expect(cleanup).to receive(:clean_batch!).and_call_original + cleanup.run! + end + + it 'cleans in batches' do + stub_const("#{described_class.name}::BATCH_SIZE", 2) + mock_artifacts_found(cleanup, 'tmp/foo/bar/1', 'tmp/foo/bar/2', 'tmp/foo/bar/3') + + expect(cleanup).to receive(:clean_batch!).twice.and_call_original + cleanup.run! + end + + def mock_artifacts_found(cleanup, *files) + mock = allow(cleanup).to receive(:find_artifacts) + + files.each { |file| mock.and_yield(file) } + end +end diff --git a/spec/tasks/gitlab/cleanup_rake_spec.rb b/spec/tasks/gitlab/cleanup_rake_spec.rb index 19794227d9f..92c094f08a4 100644 --- a/spec/tasks/gitlab/cleanup_rake_spec.rb +++ b/spec/tasks/gitlab/cleanup_rake_spec.rb @@ -156,4 +156,33 @@ describe 'gitlab:cleanup rake tasks' do end end end + + describe 'gitlab:cleanup:orphan_job_artifact_files' do + subject(:rake_task) { run_rake_task('gitlab:cleanup:orphan_job_artifact_files') } + + it 'runs the task without errors' do + expect(Gitlab::Cleanup::OrphanJobArtifactFiles) + .to receive(:new).and_call_original + + expect { rake_task }.not_to raise_error + end + + context 'with DRY_RUN set to false' do + before do + stub_env('DRY_RUN', 'false') + end + + it 'passes dry_run correctly' do + expect(Gitlab::Cleanup::OrphanJobArtifactFiles) + .to receive(:new) + .with(limit: anything, + dry_run: false, + niceness: anything, + logger: anything) + .and_call_original + + rake_task + end + end + end end -- cgit v1.2.1