summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrzegorz Bizon <grzegorz@gitlab.com>2017-06-01 15:43:35 +0000
committerGrzegorz Bizon <grzegorz@gitlab.com>2017-06-01 15:43:35 +0000
commitaff097e8f528ab5b00842df7c76d3435b1e59c96 (patch)
treeb670c73655233e27df63b80111f606770d29b412
parent6f14a3ea6ba711259dc1ae4da374d032ca49ad17 (diff)
parent6185d12c183b539ea06ab3550b2c21045d169ca4 (diff)
downloadgitlab-ce-aff097e8f528ab5b00842df7c76d3435b1e59c96.tar.gz
Merge branch 'migrate-old-artifacts' into 'master'
Fix data inconsistency issue for old artifacts by moving them to a currently used path Closes #32036 See merge request !11824
-rw-r--r--app/models/ci/build.rb32
-rw-r--r--app/uploaders/artifact_uploader.rb28
-rw-r--r--app/uploaders/gitlab_uploader.rb6
-rw-r--r--changelogs/unreleased/migrate-artifacts-to-a-new-path.yml4
-rw-r--r--db/post_migrate/20170523083112_migrate_old_artifacts.rb72
-rw-r--r--lib/api/helpers.rb10
-rw-r--r--lib/api/jobs.rb10
-rw-r--r--lib/api/runner.rb11
-rw-r--r--lib/api/v3/builds.rb10
-rw-r--r--lib/backup/artifacts.rb2
-rw-r--r--lib/ci/api/builds.rb8
-rw-r--r--spec/migrations/migrate_old_artifacts_spec.rb117
-rw-r--r--spec/uploaders/artifact_uploader_spec.rb38
-rw-r--r--spec/uploaders/gitlab_uploader_spec.rb56
14 files changed, 323 insertions, 81 deletions
diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb
index 0000ecc5bbf..cd3760fdca6 100644
--- a/app/models/ci/build.rb
+++ b/app/models/ci/build.rb
@@ -255,38 +255,6 @@ module Ci
Time.now - updated_at > 15.minutes.to_i
end
- ##
- # Deprecated
- #
- # This contains a hotfix for CI build data integrity, see #4246
- #
- # This method is used by `ArtifactUploader` to create a store_dir.
- # Warning: Uploader uses it after AND before file has been stored.
- #
- # This method returns old path to artifacts only if it already exists.
- #
- def artifacts_path
- # We need the project even if it's soft deleted, because whenever
- # we're really deleting the project, we'll also delete the builds,
- # and in order to delete the builds, we need to know where to find
- # the artifacts, which is depending on the data of the project.
- # We need to retain the project in this case.
- the_project = project || unscoped_project
-
- old = File.join(created_at.utc.strftime('%Y_%m'),
- the_project.ci_id.to_s,
- id.to_s)
-
- old_store = File.join(ArtifactUploader.artifacts_path, old)
- return old if the_project.ci_id && File.directory?(old_store)
-
- File.join(
- created_at.utc.strftime('%Y_%m'),
- the_project.id.to_s,
- id.to_s
- )
- end
-
def valid_token?(token)
self.token && ActiveSupport::SecurityUtils.variable_size_secure_compare(token, self.token)
end
diff --git a/app/uploaders/artifact_uploader.rb b/app/uploaders/artifact_uploader.rb
index 3e36ec91205..3bc0408f557 100644
--- a/app/uploaders/artifact_uploader.rb
+++ b/app/uploaders/artifact_uploader.rb
@@ -1,33 +1,35 @@
class ArtifactUploader < GitlabUploader
storage :file
- attr_accessor :build, :field
+ attr_reader :job, :field
- def self.artifacts_path
+ def self.local_artifacts_store
Gitlab.config.artifacts.path
end
def self.artifacts_upload_path
- File.join(self.artifacts_path, 'tmp/uploads/')
+ File.join(self.local_artifacts_store, 'tmp/uploads/')
end
- def self.artifacts_cache_path
- File.join(self.artifacts_path, 'tmp/cache/')
- end
-
- def initialize(build, field)
- @build, @field = build, field
+ def initialize(job, field)
+ @job, @field = job, field
end
def store_dir
- File.join(self.class.artifacts_path, @build.artifacts_path)
+ default_local_path
end
def cache_dir
- File.join(self.class.artifacts_cache_path, @build.artifacts_path)
+ File.join(self.class.local_artifacts_store, 'tmp/cache')
+ end
+
+ private
+
+ def default_local_path
+ File.join(self.class.local_artifacts_store, default_path)
end
- def filename
- file.try(:filename)
+ def default_path
+ File.join(job.created_at.utc.strftime('%Y_%m'), job.project_id.to_s, job.id.to_s)
end
end
diff --git a/app/uploaders/gitlab_uploader.rb b/app/uploaders/gitlab_uploader.rb
index e0a6c9b4067..02afddb8c6a 100644
--- a/app/uploaders/gitlab_uploader.rb
+++ b/app/uploaders/gitlab_uploader.rb
@@ -10,7 +10,11 @@ class GitlabUploader < CarrierWave::Uploader::Base
delegate :base_dir, to: :class
def file_storage?
- self.class.storage == CarrierWave::Storage::File
+ storage.is_a?(CarrierWave::Storage::File)
+ end
+
+ def file_cache_storage?
+ cache_storage.is_a?(CarrierWave::Storage::File)
end
# Reduce disk IO
diff --git a/changelogs/unreleased/migrate-artifacts-to-a-new-path.yml b/changelogs/unreleased/migrate-artifacts-to-a-new-path.yml
new file mode 100644
index 00000000000..bd022a3a91b
--- /dev/null
+++ b/changelogs/unreleased/migrate-artifacts-to-a-new-path.yml
@@ -0,0 +1,4 @@
+---
+title: Migrate artifacts to a new path
+merge_request:
+author:
diff --git a/db/post_migrate/20170523083112_migrate_old_artifacts.rb b/db/post_migrate/20170523083112_migrate_old_artifacts.rb
new file mode 100644
index 00000000000..f2690bd0017
--- /dev/null
+++ b/db/post_migrate/20170523083112_migrate_old_artifacts.rb
@@ -0,0 +1,72 @@
+class MigrateOldArtifacts < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ disable_ddl_transaction!
+
+ # This uses special heuristic to find potential candidates for data migration
+ # Read more about this here: https://gitlab.com/gitlab-org/gitlab-ce/issues/32036#note_30422345
+
+ def up
+ builds_with_artifacts.find_each do |build|
+ build.migrate_artifacts!
+ end
+ end
+
+ def down
+ end
+
+ private
+
+ def builds_with_artifacts
+ Build.with_artifacts
+ .joins('JOIN projects ON projects.id = ci_builds.project_id')
+ .where('ci_builds.id < ?', min_id)
+ .where('projects.ci_id IS NOT NULL')
+ .select('id', 'created_at', 'project_id', 'projects.ci_id AS ci_id')
+ end
+
+ def min_id
+ Build.joins('JOIN projects ON projects.id = ci_builds.project_id')
+ .where('projects.ci_id IS NULL')
+ .pluck('coalesce(min(ci_builds.id), 0)')
+ .first
+ end
+
+ class Build < ActiveRecord::Base
+ self.table_name = 'ci_builds'
+
+ scope :with_artifacts, -> { where.not(artifacts_file: [nil, '']) }
+
+ def migrate_artifacts!
+ return unless File.exist?(source_artifacts_path)
+ return if File.exist?(target_artifacts_path)
+
+ ensure_target_path
+
+ FileUtils.move(source_artifacts_path, target_artifacts_path)
+ end
+
+ private
+
+ def source_artifacts_path
+ @source_artifacts_path ||=
+ File.join(Gitlab.config.artifacts.path,
+ created_at.utc.strftime('%Y_%m'),
+ ci_id.to_s, id.to_s)
+ end
+
+ def target_artifacts_path
+ @target_artifacts_path ||=
+ File.join(Gitlab.config.artifacts.path,
+ created_at.utc.strftime('%Y_%m'),
+ project_id.to_s, id.to_s)
+ end
+
+ def ensure_target_path
+ directory = File.dirname(target_artifacts_path)
+ FileUtils.mkdir_p(directory) unless Dir.exist?(directory)
+ end
+ end
+end
diff --git a/lib/api/helpers.rb b/lib/api/helpers.rb
index d61450f8258..81f6fc3201d 100644
--- a/lib/api/helpers.rb
+++ b/lib/api/helpers.rb
@@ -311,6 +311,16 @@ module API
end
end
+ def present_artifacts!(artifacts_file)
+ return not_found! unless artifacts_file.exists?
+
+ if artifacts_file.file_storage?
+ present_file!(artifacts_file.path, artifacts_file.filename)
+ else
+ redirect_to(artifacts_file.url)
+ end
+ end
+
private
def private_token
diff --git a/lib/api/jobs.rb b/lib/api/jobs.rb
index 0223957fde1..8a67de10bca 100644
--- a/lib/api/jobs.rb
+++ b/lib/api/jobs.rb
@@ -224,16 +224,6 @@ module API
find_build(id) || not_found!
end
- def present_artifacts!(artifacts_file)
- if !artifacts_file.file_storage?
- redirect_to(build.artifacts_file.url)
- elsif artifacts_file.exists?
- present_file!(artifacts_file.path, artifacts_file.filename)
- else
- not_found!
- end
- end
-
def filter_builds(builds, scope)
return builds if scope.nil? || scope.empty?
diff --git a/lib/api/runner.rb b/lib/api/runner.rb
index 6fbb02cb3aa..3fd0536dadd 100644
--- a/lib/api/runner.rb
+++ b/lib/api/runner.rb
@@ -241,16 +241,7 @@ module API
get '/:id/artifacts' do
job = authenticate_job!
- artifacts_file = job.artifacts_file
- unless artifacts_file.file_storage?
- return redirect_to job.artifacts_file.url
- end
-
- unless artifacts_file.exists?
- not_found!
- end
-
- present_file!(artifacts_file.path, artifacts_file.filename)
+ present_artifacts!(job.artifacts_file)
end
end
end
diff --git a/lib/api/v3/builds.rb b/lib/api/v3/builds.rb
index 21935922414..93ad9eb26b8 100644
--- a/lib/api/v3/builds.rb
+++ b/lib/api/v3/builds.rb
@@ -225,16 +225,6 @@ module API
find_build(id) || not_found!
end
- def present_artifacts!(artifacts_file)
- if !artifacts_file.file_storage?
- redirect_to(build.artifacts_file.url)
- elsif artifacts_file.exists?
- present_file!(artifacts_file.path, artifacts_file.filename)
- else
- not_found!
- end
- end
-
def filter_builds(builds, scope)
return builds if scope.nil? || scope.empty?
diff --git a/lib/backup/artifacts.rb b/lib/backup/artifacts.rb
index 51fa3867e67..1f4bda6f588 100644
--- a/lib/backup/artifacts.rb
+++ b/lib/backup/artifacts.rb
@@ -3,7 +3,7 @@ require 'backup/files'
module Backup
class Artifacts < Files
def initialize
- super('artifacts', ArtifactUploader.artifacts_path)
+ super('artifacts', ArtifactUploader.local_artifacts_store)
end
def create_files_dir
diff --git a/lib/ci/api/builds.rb b/lib/ci/api/builds.rb
index 67b269b330c..2285ef241d7 100644
--- a/lib/ci/api/builds.rb
+++ b/lib/ci/api/builds.rb
@@ -187,14 +187,14 @@ module Ci
build = authenticate_build!
artifacts_file = build.artifacts_file
- unless artifacts_file.file_storage?
- return redirect_to build.artifacts_file.url
- end
-
unless artifacts_file.exists?
not_found!
end
+ unless artifacts_file.file_storage?
+ return redirect_to build.artifacts_file.url
+ end
+
present_file!(artifacts_file.path, artifacts_file.filename)
end
diff --git a/spec/migrations/migrate_old_artifacts_spec.rb b/spec/migrations/migrate_old_artifacts_spec.rb
new file mode 100644
index 00000000000..50f4bbda001
--- /dev/null
+++ b/spec/migrations/migrate_old_artifacts_spec.rb
@@ -0,0 +1,117 @@
+# encoding: utf-8
+
+require 'spec_helper'
+require Rails.root.join('db', 'post_migrate', '20170523083112_migrate_old_artifacts.rb')
+
+describe MigrateOldArtifacts do
+ let(:migration) { described_class.new }
+ let!(:directory) { Dir.mktmpdir }
+
+ before do
+ allow(Gitlab.config.artifacts).to receive(:path).and_return(directory)
+ end
+
+ after do
+ FileUtils.remove_entry_secure(directory)
+ end
+
+ context 'with migratable data' do
+ let(:project1) { create(:empty_project, ci_id: 2) }
+ let(:project2) { create(:empty_project, ci_id: 3) }
+ let(:project3) { create(:empty_project) }
+
+ let(:pipeline1) { create(:ci_empty_pipeline, project: project1) }
+ let(:pipeline2) { create(:ci_empty_pipeline, project: project2) }
+ let(:pipeline3) { create(:ci_empty_pipeline, project: project3) }
+
+ let!(:build_with_legacy_artifacts) { create(:ci_build, pipeline: pipeline1) }
+ let!(:build_without_artifacts) { create(:ci_build, pipeline: pipeline1) }
+ let!(:build2) { create(:ci_build, :artifacts, pipeline: pipeline2) }
+ let!(:build3) { create(:ci_build, :artifacts, pipeline: pipeline3) }
+
+ before do
+ store_artifacts_in_legacy_path(build_with_legacy_artifacts)
+ end
+
+ it "legacy artifacts are not accessible" do
+ expect(build_with_legacy_artifacts.artifacts?).to be_falsey
+ end
+
+ it "legacy artifacts are set" do
+ expect(build_with_legacy_artifacts.artifacts_file_identifier).not_to be_nil
+ end
+
+ describe '#min_id' do
+ subject { migration.send(:min_id) }
+
+ it 'returns the newest build for which ci_id is not defined' do
+ is_expected.to eq(build3.id)
+ end
+ end
+
+ describe '#builds_with_artifacts' do
+ subject { migration.send(:builds_with_artifacts).map(&:id) }
+
+ it 'returns a list of builds that has artifacts and could be migrated' do
+ is_expected.to contain_exactly(build_with_legacy_artifacts.id, build2.id)
+ end
+ end
+
+ describe '#up' do
+ context 'when migrating artifacts' do
+ before do
+ migration.up
+ end
+
+ it 'all files do have artifacts' do
+ Ci::Build.with_artifacts do |build|
+ expect(build).to have_artifacts
+ end
+ end
+
+ it 'artifacts are no longer present on legacy path' do
+ expect(File.exist?(legacy_path(build_with_legacy_artifacts))).to eq(false)
+ end
+ end
+
+ context 'when there are aritfacts in old and new directory' do
+ before do
+ store_artifacts_in_legacy_path(build2)
+
+ migration.up
+ end
+
+ it 'does not move old files' do
+ expect(File.exist?(legacy_path(build2))).to eq(true)
+ end
+ end
+ end
+
+ private
+
+ def store_artifacts_in_legacy_path(build)
+ FileUtils.mkdir_p(legacy_path(build))
+
+ FileUtils.copy(
+ Rails.root.join('spec/fixtures/ci_build_artifacts.zip'),
+ File.join(legacy_path(build), "ci_build_artifacts.zip"))
+
+ FileUtils.copy(
+ Rails.root.join('spec/fixtures/ci_build_artifacts_metadata.gz'),
+ File.join(legacy_path(build), "ci_build_artifacts_metadata.gz"))
+
+ build.update_columns(
+ artifacts_file: 'ci_build_artifacts.zip',
+ artifacts_metadata: 'ci_build_artifacts_metadata.gz')
+
+ build.reload
+ end
+
+ def legacy_path(build)
+ File.join(directory,
+ build.created_at.utc.strftime('%Y_%m'),
+ build.project.ci_id.to_s,
+ build.id.to_s)
+ end
+ end
+end
diff --git a/spec/uploaders/artifact_uploader_spec.rb b/spec/uploaders/artifact_uploader_spec.rb
new file mode 100644
index 00000000000..24e2e3a9f0e
--- /dev/null
+++ b/spec/uploaders/artifact_uploader_spec.rb
@@ -0,0 +1,38 @@
+require 'rails_helper'
+
+describe ArtifactUploader do
+ let(:job) { create(:ci_build) }
+ let(:uploader) { described_class.new(job, :artifacts_file) }
+ let(:path) { Gitlab.config.artifacts.path }
+
+ describe '.local_artifacts_store' do
+ subject { described_class.local_artifacts_store }
+
+ it "delegate to artifacts path" do
+ expect(Gitlab.config.artifacts).to receive(:path)
+
+ subject
+ end
+ end
+
+ describe '.artifacts_upload_path' do
+ subject { described_class.artifacts_upload_path }
+
+ it { is_expected.to start_with(path) }
+ it { is_expected.to end_with('tmp/uploads/') }
+ end
+
+ describe '#store_dir' do
+ subject { uploader.store_dir }
+
+ it { is_expected.to start_with(path) }
+ it { is_expected.to end_with("#{job.project_id}/#{job.id}") }
+ end
+
+ describe '#cache_dir' do
+ subject { uploader.cache_dir }
+
+ it { is_expected.to start_with(path) }
+ it { is_expected.to end_with('tmp/cache') }
+ end
+end
diff --git a/spec/uploaders/gitlab_uploader_spec.rb b/spec/uploaders/gitlab_uploader_spec.rb
new file mode 100644
index 00000000000..78e9d9cf46c
--- /dev/null
+++ b/spec/uploaders/gitlab_uploader_spec.rb
@@ -0,0 +1,56 @@
+require 'rails_helper'
+require 'carrierwave/storage/fog'
+
+describe GitlabUploader do
+ let(:uploader_class) { Class.new(described_class) }
+
+ subject { uploader_class.new }
+
+ describe '#file_storage?' do
+ context 'when file storage is used' do
+ before do
+ uploader_class.storage(:file)
+ end
+
+ it { is_expected.to be_file_storage }
+ end
+
+ context 'when is remote storage' do
+ before do
+ uploader_class.storage(:fog)
+ end
+
+ it { is_expected.not_to be_file_storage }
+ end
+ end
+
+ describe '#file_cache_storage?' do
+ context 'when file storage is used' do
+ before do
+ uploader_class.cache_storage(:file)
+ end
+
+ it { is_expected.to be_file_cache_storage }
+ end
+
+ context 'when is remote storage' do
+ before do
+ uploader_class.cache_storage(:fog)
+ end
+
+ it { is_expected.not_to be_file_cache_storage }
+ end
+ end
+
+ describe '#move_to_cache' do
+ it 'is true' do
+ expect(subject.move_to_cache).to eq(true)
+ end
+ end
+
+ describe '#move_to_store' do
+ it 'is true' do
+ expect(subject.move_to_store).to eq(true)
+ end
+ end
+end