diff options
author | Grzegorz Bizon <grzegorz@gitlab.com> | 2017-06-01 15:43:35 +0000 |
---|---|---|
committer | Grzegorz Bizon <grzegorz@gitlab.com> | 2017-06-01 15:43:35 +0000 |
commit | aff097e8f528ab5b00842df7c76d3435b1e59c96 (patch) | |
tree | b670c73655233e27df63b80111f606770d29b412 | |
parent | 6f14a3ea6ba711259dc1ae4da374d032ca49ad17 (diff) | |
parent | 6185d12c183b539ea06ab3550b2c21045d169ca4 (diff) | |
download | gitlab-ce-aff097e8f528ab5b00842df7c76d3435b1e59c96.tar.gz |
Merge branch 'migrate-old-artifacts' into 'master'
Fix data inconsistency issue for old artifacts by moving them to a currently used path
Closes #32036
See merge request !11824
-rw-r--r-- | app/models/ci/build.rb | 32 | ||||
-rw-r--r-- | app/uploaders/artifact_uploader.rb | 28 | ||||
-rw-r--r-- | app/uploaders/gitlab_uploader.rb | 6 | ||||
-rw-r--r-- | changelogs/unreleased/migrate-artifacts-to-a-new-path.yml | 4 | ||||
-rw-r--r-- | db/post_migrate/20170523083112_migrate_old_artifacts.rb | 72 | ||||
-rw-r--r-- | lib/api/helpers.rb | 10 | ||||
-rw-r--r-- | lib/api/jobs.rb | 10 | ||||
-rw-r--r-- | lib/api/runner.rb | 11 | ||||
-rw-r--r-- | lib/api/v3/builds.rb | 10 | ||||
-rw-r--r-- | lib/backup/artifacts.rb | 2 | ||||
-rw-r--r-- | lib/ci/api/builds.rb | 8 | ||||
-rw-r--r-- | spec/migrations/migrate_old_artifacts_spec.rb | 117 | ||||
-rw-r--r-- | spec/uploaders/artifact_uploader_spec.rb | 38 | ||||
-rw-r--r-- | spec/uploaders/gitlab_uploader_spec.rb | 56 |
14 files changed, 323 insertions, 81 deletions
diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index 0000ecc5bbf..cd3760fdca6 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -255,38 +255,6 @@ module Ci Time.now - updated_at > 15.minutes.to_i end - ## - # Deprecated - # - # This contains a hotfix for CI build data integrity, see #4246 - # - # This method is used by `ArtifactUploader` to create a store_dir. - # Warning: Uploader uses it after AND before file has been stored. - # - # This method returns old path to artifacts only if it already exists. - # - def artifacts_path - # We need the project even if it's soft deleted, because whenever - # we're really deleting the project, we'll also delete the builds, - # and in order to delete the builds, we need to know where to find - # the artifacts, which is depending on the data of the project. - # We need to retain the project in this case. - the_project = project || unscoped_project - - old = File.join(created_at.utc.strftime('%Y_%m'), - the_project.ci_id.to_s, - id.to_s) - - old_store = File.join(ArtifactUploader.artifacts_path, old) - return old if the_project.ci_id && File.directory?(old_store) - - File.join( - created_at.utc.strftime('%Y_%m'), - the_project.id.to_s, - id.to_s - ) - end - def valid_token?(token) self.token && ActiveSupport::SecurityUtils.variable_size_secure_compare(token, self.token) end diff --git a/app/uploaders/artifact_uploader.rb b/app/uploaders/artifact_uploader.rb index 3e36ec91205..3bc0408f557 100644 --- a/app/uploaders/artifact_uploader.rb +++ b/app/uploaders/artifact_uploader.rb @@ -1,33 +1,35 @@ class ArtifactUploader < GitlabUploader storage :file - attr_accessor :build, :field + attr_reader :job, :field - def self.artifacts_path + def self.local_artifacts_store Gitlab.config.artifacts.path end def self.artifacts_upload_path - File.join(self.artifacts_path, 'tmp/uploads/') + File.join(self.local_artifacts_store, 'tmp/uploads/') end - def self.artifacts_cache_path - File.join(self.artifacts_path, 'tmp/cache/') - end - - def initialize(build, field) - @build, @field = build, field + def initialize(job, field) + @job, @field = job, field end def store_dir - File.join(self.class.artifacts_path, @build.artifacts_path) + default_local_path end def cache_dir - File.join(self.class.artifacts_cache_path, @build.artifacts_path) + File.join(self.class.local_artifacts_store, 'tmp/cache') + end + + private + + def default_local_path + File.join(self.class.local_artifacts_store, default_path) end - def filename - file.try(:filename) + def default_path + File.join(job.created_at.utc.strftime('%Y_%m'), job.project_id.to_s, job.id.to_s) end end diff --git a/app/uploaders/gitlab_uploader.rb b/app/uploaders/gitlab_uploader.rb index e0a6c9b4067..02afddb8c6a 100644 --- a/app/uploaders/gitlab_uploader.rb +++ b/app/uploaders/gitlab_uploader.rb @@ -10,7 +10,11 @@ class GitlabUploader < CarrierWave::Uploader::Base delegate :base_dir, to: :class def file_storage? - self.class.storage == CarrierWave::Storage::File + storage.is_a?(CarrierWave::Storage::File) + end + + def file_cache_storage? + cache_storage.is_a?(CarrierWave::Storage::File) end # Reduce disk IO diff --git a/changelogs/unreleased/migrate-artifacts-to-a-new-path.yml b/changelogs/unreleased/migrate-artifacts-to-a-new-path.yml new file mode 100644 index 00000000000..bd022a3a91b --- /dev/null +++ b/changelogs/unreleased/migrate-artifacts-to-a-new-path.yml @@ -0,0 +1,4 @@ +--- +title: Migrate artifacts to a new path +merge_request: +author: diff --git a/db/post_migrate/20170523083112_migrate_old_artifacts.rb b/db/post_migrate/20170523083112_migrate_old_artifacts.rb new file mode 100644 index 00000000000..f2690bd0017 --- /dev/null +++ b/db/post_migrate/20170523083112_migrate_old_artifacts.rb @@ -0,0 +1,72 @@ +class MigrateOldArtifacts < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + # This uses special heuristic to find potential candidates for data migration + # Read more about this here: https://gitlab.com/gitlab-org/gitlab-ce/issues/32036#note_30422345 + + def up + builds_with_artifacts.find_each do |build| + build.migrate_artifacts! + end + end + + def down + end + + private + + def builds_with_artifacts + Build.with_artifacts + .joins('JOIN projects ON projects.id = ci_builds.project_id') + .where('ci_builds.id < ?', min_id) + .where('projects.ci_id IS NOT NULL') + .select('id', 'created_at', 'project_id', 'projects.ci_id AS ci_id') + end + + def min_id + Build.joins('JOIN projects ON projects.id = ci_builds.project_id') + .where('projects.ci_id IS NULL') + .pluck('coalesce(min(ci_builds.id), 0)') + .first + end + + class Build < ActiveRecord::Base + self.table_name = 'ci_builds' + + scope :with_artifacts, -> { where.not(artifacts_file: [nil, '']) } + + def migrate_artifacts! + return unless File.exist?(source_artifacts_path) + return if File.exist?(target_artifacts_path) + + ensure_target_path + + FileUtils.move(source_artifacts_path, target_artifacts_path) + end + + private + + def source_artifacts_path + @source_artifacts_path ||= + File.join(Gitlab.config.artifacts.path, + created_at.utc.strftime('%Y_%m'), + ci_id.to_s, id.to_s) + end + + def target_artifacts_path + @target_artifacts_path ||= + File.join(Gitlab.config.artifacts.path, + created_at.utc.strftime('%Y_%m'), + project_id.to_s, id.to_s) + end + + def ensure_target_path + directory = File.dirname(target_artifacts_path) + FileUtils.mkdir_p(directory) unless Dir.exist?(directory) + end + end +end diff --git a/lib/api/helpers.rb b/lib/api/helpers.rb index d61450f8258..81f6fc3201d 100644 --- a/lib/api/helpers.rb +++ b/lib/api/helpers.rb @@ -311,6 +311,16 @@ module API end end + def present_artifacts!(artifacts_file) + return not_found! unless artifacts_file.exists? + + if artifacts_file.file_storage? + present_file!(artifacts_file.path, artifacts_file.filename) + else + redirect_to(artifacts_file.url) + end + end + private def private_token diff --git a/lib/api/jobs.rb b/lib/api/jobs.rb index 0223957fde1..8a67de10bca 100644 --- a/lib/api/jobs.rb +++ b/lib/api/jobs.rb @@ -224,16 +224,6 @@ module API find_build(id) || not_found! end - def present_artifacts!(artifacts_file) - if !artifacts_file.file_storage? - redirect_to(build.artifacts_file.url) - elsif artifacts_file.exists? - present_file!(artifacts_file.path, artifacts_file.filename) - else - not_found! - end - end - def filter_builds(builds, scope) return builds if scope.nil? || scope.empty? diff --git a/lib/api/runner.rb b/lib/api/runner.rb index 6fbb02cb3aa..3fd0536dadd 100644 --- a/lib/api/runner.rb +++ b/lib/api/runner.rb @@ -241,16 +241,7 @@ module API get '/:id/artifacts' do job = authenticate_job! - artifacts_file = job.artifacts_file - unless artifacts_file.file_storage? - return redirect_to job.artifacts_file.url - end - - unless artifacts_file.exists? - not_found! - end - - present_file!(artifacts_file.path, artifacts_file.filename) + present_artifacts!(job.artifacts_file) end end end diff --git a/lib/api/v3/builds.rb b/lib/api/v3/builds.rb index 21935922414..93ad9eb26b8 100644 --- a/lib/api/v3/builds.rb +++ b/lib/api/v3/builds.rb @@ -225,16 +225,6 @@ module API find_build(id) || not_found! end - def present_artifacts!(artifacts_file) - if !artifacts_file.file_storage? - redirect_to(build.artifacts_file.url) - elsif artifacts_file.exists? - present_file!(artifacts_file.path, artifacts_file.filename) - else - not_found! - end - end - def filter_builds(builds, scope) return builds if scope.nil? || scope.empty? diff --git a/lib/backup/artifacts.rb b/lib/backup/artifacts.rb index 51fa3867e67..1f4bda6f588 100644 --- a/lib/backup/artifacts.rb +++ b/lib/backup/artifacts.rb @@ -3,7 +3,7 @@ require 'backup/files' module Backup class Artifacts < Files def initialize - super('artifacts', ArtifactUploader.artifacts_path) + super('artifacts', ArtifactUploader.local_artifacts_store) end def create_files_dir diff --git a/lib/ci/api/builds.rb b/lib/ci/api/builds.rb index 67b269b330c..2285ef241d7 100644 --- a/lib/ci/api/builds.rb +++ b/lib/ci/api/builds.rb @@ -187,14 +187,14 @@ module Ci build = authenticate_build! artifacts_file = build.artifacts_file - unless artifacts_file.file_storage? - return redirect_to build.artifacts_file.url - end - unless artifacts_file.exists? not_found! end + unless artifacts_file.file_storage? + return redirect_to build.artifacts_file.url + end + present_file!(artifacts_file.path, artifacts_file.filename) end diff --git a/spec/migrations/migrate_old_artifacts_spec.rb b/spec/migrations/migrate_old_artifacts_spec.rb new file mode 100644 index 00000000000..50f4bbda001 --- /dev/null +++ b/spec/migrations/migrate_old_artifacts_spec.rb @@ -0,0 +1,117 @@ +# encoding: utf-8 + +require 'spec_helper' +require Rails.root.join('db', 'post_migrate', '20170523083112_migrate_old_artifacts.rb') + +describe MigrateOldArtifacts do + let(:migration) { described_class.new } + let!(:directory) { Dir.mktmpdir } + + before do + allow(Gitlab.config.artifacts).to receive(:path).and_return(directory) + end + + after do + FileUtils.remove_entry_secure(directory) + end + + context 'with migratable data' do + let(:project1) { create(:empty_project, ci_id: 2) } + let(:project2) { create(:empty_project, ci_id: 3) } + let(:project3) { create(:empty_project) } + + let(:pipeline1) { create(:ci_empty_pipeline, project: project1) } + let(:pipeline2) { create(:ci_empty_pipeline, project: project2) } + let(:pipeline3) { create(:ci_empty_pipeline, project: project3) } + + let!(:build_with_legacy_artifacts) { create(:ci_build, pipeline: pipeline1) } + let!(:build_without_artifacts) { create(:ci_build, pipeline: pipeline1) } + let!(:build2) { create(:ci_build, :artifacts, pipeline: pipeline2) } + let!(:build3) { create(:ci_build, :artifacts, pipeline: pipeline3) } + + before do + store_artifacts_in_legacy_path(build_with_legacy_artifacts) + end + + it "legacy artifacts are not accessible" do + expect(build_with_legacy_artifacts.artifacts?).to be_falsey + end + + it "legacy artifacts are set" do + expect(build_with_legacy_artifacts.artifacts_file_identifier).not_to be_nil + end + + describe '#min_id' do + subject { migration.send(:min_id) } + + it 'returns the newest build for which ci_id is not defined' do + is_expected.to eq(build3.id) + end + end + + describe '#builds_with_artifacts' do + subject { migration.send(:builds_with_artifacts).map(&:id) } + + it 'returns a list of builds that has artifacts and could be migrated' do + is_expected.to contain_exactly(build_with_legacy_artifacts.id, build2.id) + end + end + + describe '#up' do + context 'when migrating artifacts' do + before do + migration.up + end + + it 'all files do have artifacts' do + Ci::Build.with_artifacts do |build| + expect(build).to have_artifacts + end + end + + it 'artifacts are no longer present on legacy path' do + expect(File.exist?(legacy_path(build_with_legacy_artifacts))).to eq(false) + end + end + + context 'when there are aritfacts in old and new directory' do + before do + store_artifacts_in_legacy_path(build2) + + migration.up + end + + it 'does not move old files' do + expect(File.exist?(legacy_path(build2))).to eq(true) + end + end + end + + private + + def store_artifacts_in_legacy_path(build) + FileUtils.mkdir_p(legacy_path(build)) + + FileUtils.copy( + Rails.root.join('spec/fixtures/ci_build_artifacts.zip'), + File.join(legacy_path(build), "ci_build_artifacts.zip")) + + FileUtils.copy( + Rails.root.join('spec/fixtures/ci_build_artifacts_metadata.gz'), + File.join(legacy_path(build), "ci_build_artifacts_metadata.gz")) + + build.update_columns( + artifacts_file: 'ci_build_artifacts.zip', + artifacts_metadata: 'ci_build_artifacts_metadata.gz') + + build.reload + end + + def legacy_path(build) + File.join(directory, + build.created_at.utc.strftime('%Y_%m'), + build.project.ci_id.to_s, + build.id.to_s) + end + end +end diff --git a/spec/uploaders/artifact_uploader_spec.rb b/spec/uploaders/artifact_uploader_spec.rb new file mode 100644 index 00000000000..24e2e3a9f0e --- /dev/null +++ b/spec/uploaders/artifact_uploader_spec.rb @@ -0,0 +1,38 @@ +require 'rails_helper' + +describe ArtifactUploader do + let(:job) { create(:ci_build) } + let(:uploader) { described_class.new(job, :artifacts_file) } + let(:path) { Gitlab.config.artifacts.path } + + describe '.local_artifacts_store' do + subject { described_class.local_artifacts_store } + + it "delegate to artifacts path" do + expect(Gitlab.config.artifacts).to receive(:path) + + subject + end + end + + describe '.artifacts_upload_path' do + subject { described_class.artifacts_upload_path } + + it { is_expected.to start_with(path) } + it { is_expected.to end_with('tmp/uploads/') } + end + + describe '#store_dir' do + subject { uploader.store_dir } + + it { is_expected.to start_with(path) } + it { is_expected.to end_with("#{job.project_id}/#{job.id}") } + end + + describe '#cache_dir' do + subject { uploader.cache_dir } + + it { is_expected.to start_with(path) } + it { is_expected.to end_with('tmp/cache') } + end +end diff --git a/spec/uploaders/gitlab_uploader_spec.rb b/spec/uploaders/gitlab_uploader_spec.rb new file mode 100644 index 00000000000..78e9d9cf46c --- /dev/null +++ b/spec/uploaders/gitlab_uploader_spec.rb @@ -0,0 +1,56 @@ +require 'rails_helper' +require 'carrierwave/storage/fog' + +describe GitlabUploader do + let(:uploader_class) { Class.new(described_class) } + + subject { uploader_class.new } + + describe '#file_storage?' do + context 'when file storage is used' do + before do + uploader_class.storage(:file) + end + + it { is_expected.to be_file_storage } + end + + context 'when is remote storage' do + before do + uploader_class.storage(:fog) + end + + it { is_expected.not_to be_file_storage } + end + end + + describe '#file_cache_storage?' do + context 'when file storage is used' do + before do + uploader_class.cache_storage(:file) + end + + it { is_expected.to be_file_cache_storage } + end + + context 'when is remote storage' do + before do + uploader_class.cache_storage(:fog) + end + + it { is_expected.not_to be_file_cache_storage } + end + end + + describe '#move_to_cache' do + it 'is true' do + expect(subject.move_to_cache).to eq(true) + end + end + + describe '#move_to_store' do + it 'is true' do + expect(subject.move_to_store).to eq(true) + end + end +end |