summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorSean McGivern <sean@mcgivern.me.uk>2018-03-27 08:20:03 +0000
committerSean McGivern <sean@mcgivern.me.uk>2018-03-27 08:20:03 +0000
commit835fe4d324b5a89c09f06d74d5ddc73a2c41a38b (patch)
tree471a633221bf9d44bf4af94ff3137a94a2bf1407 /lib
parentab8f13c3ef6e07eb8d44805dc9eef4b008e1bbe9 (diff)
parent6d63a098f9a40f87ad922fa4c0ddcfb754e60023 (diff)
downloadgitlab-ce-835fe4d324b5a89c09f06d74d5ddc73a2c41a38b.tar.gz
Merge branch '40781-os-to-ce' into 'master'
Bring Object Storage to CE Closes #4171, #4163, #3370, #2841, and #29203 See merge request gitlab-org/gitlab-ce!17358
Diffstat (limited to 'lib')
-rw-r--r--lib/api/helpers.rb16
-rw-r--r--lib/api/job_artifacts.rb4
-rw-r--r--lib/api/jobs.rb2
-rw-r--r--lib/api/project_export.rb2
-rw-r--r--lib/api/runner.rb3
-rw-r--r--lib/api/v3/builds.rb6
-rw-r--r--lib/gitlab/ci/trace/http_io.rb187
-rw-r--r--lib/gitlab/ci/trace/stream.rb2
-rw-r--r--lib/gitlab/verify/lfs_objects.rb2
-rw-r--r--lib/gitlab/verify/uploads.rb2
-rw-r--r--lib/tasks/gitlab/artifacts/migrate.rake25
-rw-r--r--lib/tasks/gitlab/lfs/migrate.rake22
-rw-r--r--lib/tasks/gitlab/uploads/migrate.rake33
13 files changed, 289 insertions, 17 deletions
diff --git a/lib/api/helpers.rb b/lib/api/helpers.rb
index e4fca77ab5d..e59e8a45908 100644
--- a/lib/api/helpers.rb
+++ b/lib/api/helpers.rb
@@ -410,7 +410,7 @@ module API
)
end
- def present_file!(path, filename, content_type = 'application/octet-stream')
+ def present_disk_file!(path, filename, content_type = 'application/octet-stream')
filename ||= File.basename(path)
header['Content-Disposition'] = "attachment; filename=#{filename}"
header['Content-Transfer-Encoding'] = 'binary'
@@ -426,13 +426,17 @@ module API
end
end
- def present_artifacts!(artifacts_file)
- return not_found! unless artifacts_file.exists?
+ def present_carrierwave_file!(file, supports_direct_download: true)
+ return not_found! unless file.exists?
- if artifacts_file.file_storage?
- present_file!(artifacts_file.path, artifacts_file.filename)
+ if file.file_storage?
+ present_disk_file!(file.path, file.filename)
+ elsif supports_direct_download && file.class.direct_download_enabled?
+ redirect(file.url)
else
- redirect_to(artifacts_file.url)
+ header(*Gitlab::Workhorse.send_url(file.url))
+ status :ok
+ body
end
end
diff --git a/lib/api/job_artifacts.rb b/lib/api/job_artifacts.rb
index 47e5eeab31d..b1adef49d46 100644
--- a/lib/api/job_artifacts.rb
+++ b/lib/api/job_artifacts.rb
@@ -28,7 +28,7 @@ module API
builds = user_project.latest_successful_builds_for(params[:ref_name])
latest_build = builds.find_by!(name: params[:job])
- present_artifacts!(latest_build.artifacts_file)
+ present_carrierwave_file!(latest_build.artifacts_file)
end
desc 'Download the artifacts archive from a job' do
@@ -43,7 +43,7 @@ module API
build = find_build!(params[:job_id])
- present_artifacts!(build.artifacts_file)
+ present_carrierwave_file!(build.artifacts_file)
end
desc 'Download a specific file from artifacts archive' do
diff --git a/lib/api/jobs.rb b/lib/api/jobs.rb
index 9c205514b3a..60911c8d733 100644
--- a/lib/api/jobs.rb
+++ b/lib/api/jobs.rb
@@ -72,7 +72,7 @@ module API
present build, with: Entities::Job
end
- # TODO: We should use `present_file!` and leave this implementation for backward compatibility (when build trace
+ # TODO: We should use `present_disk_file!` and leave this implementation for backward compatibility (when build trace
# is saved in the DB instead of file). But before that, we need to consider how to replace the value of
# `runners_token` with some mask (like `xxxxxx`) when sending trace file directly by workhorse.
desc 'Get a trace of a specific job of a project'
diff --git a/lib/api/project_export.rb b/lib/api/project_export.rb
index b0a7fd6f4ab..efc4a33ae1b 100644
--- a/lib/api/project_export.rb
+++ b/lib/api/project_export.rb
@@ -25,7 +25,7 @@ module API
render_api_error!('404 Not found or has expired', 404) unless path
- present_file!(path, File.basename(path), 'application/gzip')
+ present_disk_file!(path, File.basename(path), 'application/gzip')
end
desc 'Start export' do
diff --git a/lib/api/runner.rb b/lib/api/runner.rb
index 7e6c33ec33d..8da97a97754 100644
--- a/lib/api/runner.rb
+++ b/lib/api/runner.rb
@@ -244,11 +244,12 @@ module API
params do
requires :id, type: Integer, desc: %q(Job's ID)
optional :token, type: String, desc: %q(Job's authentication token)
+ optional :direct_download, default: false, type: Boolean, desc: %q(Perform direct download from remote storage instead of proxying artifacts)
end
get '/:id/artifacts' do
job = authenticate_job!
- present_artifacts!(job.artifacts_file)
+ present_carrierwave_file!(job.artifacts_file, supports_direct_download: params[:direct_download])
end
end
end
diff --git a/lib/api/v3/builds.rb b/lib/api/v3/builds.rb
index ac76fece931..683b9c993cb 100644
--- a/lib/api/v3/builds.rb
+++ b/lib/api/v3/builds.rb
@@ -85,7 +85,7 @@ module API
build = get_build!(params[:build_id])
- present_artifacts!(build.artifacts_file)
+ present_carrierwave_file!(build.artifacts_file)
end
desc 'Download the artifacts file from build' do
@@ -102,10 +102,10 @@ module API
builds = user_project.latest_successful_builds_for(params[:ref_name])
latest_build = builds.find_by!(name: params[:job])
- present_artifacts!(latest_build.artifacts_file)
+ present_carrierwave_file!(latest_build.artifacts_file)
end
- # TODO: We should use `present_file!` and leave this implementation for backward compatibility (when build trace
+ # TODO: We should use `present_disk_file!` and leave this implementation for backward compatibility (when build trace
# is saved in the DB instead of file). But before that, we need to consider how to replace the value of
# `runners_token` with some mask (like `xxxxxx`) when sending trace file directly by workhorse.
desc 'Get a trace of a specific build of a project'
diff --git a/lib/gitlab/ci/trace/http_io.rb b/lib/gitlab/ci/trace/http_io.rb
new file mode 100644
index 00000000000..ac4308f4e2c
--- /dev/null
+++ b/lib/gitlab/ci/trace/http_io.rb
@@ -0,0 +1,187 @@
+##
+# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html)
+# source: https://gitlab.com/snippets/1685610
+module Gitlab
+ module Ci
+ class Trace
+ class HttpIO
+ BUFFER_SIZE = 128.kilobytes
+
+ InvalidURLError = Class.new(StandardError)
+ FailedToGetChunkError = Class.new(StandardError)
+
+ attr_reader :uri, :size
+ attr_reader :tell
+ attr_reader :chunk, :chunk_range
+
+ alias_method :pos, :tell
+
+ def initialize(url, size)
+ raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url)
+
+ @uri = URI(url)
+ @size = size
+ @tell = 0
+ end
+
+ def close
+ # no-op
+ end
+
+ def binmode
+ # no-op
+ end
+
+ def binmode?
+ true
+ end
+
+ def path
+ nil
+ end
+
+ def url
+ @uri.to_s
+ end
+
+ def seek(pos, where = IO::SEEK_SET)
+ new_pos =
+ case where
+ when IO::SEEK_END
+ size + pos
+ when IO::SEEK_SET
+ pos
+ when IO::SEEK_CUR
+ tell + pos
+ else
+ -1
+ end
+
+ raise 'new position is outside of file' if new_pos < 0 || new_pos > size
+
+ @tell = new_pos
+ end
+
+ def eof?
+ tell == size
+ end
+
+ def each_line
+ until eof?
+ line = readline
+ break if line.nil?
+
+ yield(line)
+ end
+ end
+
+ def read(length = nil)
+ out = ""
+
+ until eof? || (length && out.length >= length)
+ data = get_chunk
+ break if data.empty?
+
+ out << data
+ @tell += data.bytesize
+ end
+
+ out = out[0, length] if length && out.length > length
+
+ out
+ end
+
+ def readline
+ out = ""
+
+ until eof?
+ data = get_chunk
+ new_line = data.index("\n")
+
+ if !new_line.nil?
+ out << data[0..new_line]
+ @tell += new_line + 1
+ break
+ else
+ out << data
+ @tell += data.bytesize
+ end
+ end
+
+ out
+ end
+
+ def write(data)
+ raise NotImplementedError
+ end
+
+ def truncate(offset)
+ raise NotImplementedError
+ end
+
+ def flush
+ raise NotImplementedError
+ end
+
+ def present?
+ true
+ end
+
+ private
+
+ ##
+ # The below methods are not implemented in IO class
+ #
+ def in_range?
+ @chunk_range&.include?(tell)
+ end
+
+ def get_chunk
+ unless in_range?
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http|
+ http.request(request)
+ end
+
+ raise FailedToGetChunkError unless response.code == '200' || response.code == '206'
+
+ @chunk = response.body.force_encoding(Encoding::BINARY)
+ @chunk_range = response.content_range
+
+ ##
+ # Note: If provider does not return content_range, then we set it as we requested
+ # Provider: minio
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # Provider: AWS
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # Provider: GCS
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPOK 200
+ @chunk_range ||= (chunk_start...(chunk_start + @chunk.length))
+ end
+
+ @chunk[chunk_offset..BUFFER_SIZE]
+ end
+
+ def request
+ Net::HTTP::Get.new(uri).tap do |request|
+ request.set_range(chunk_start, BUFFER_SIZE)
+ end
+ end
+
+ def chunk_offset
+ tell % BUFFER_SIZE
+ end
+
+ def chunk_start
+ (tell / BUFFER_SIZE) * BUFFER_SIZE
+ end
+
+ def chunk_end
+ [chunk_start + BUFFER_SIZE, size].min
+ end
+ end
+ end
+ end
+end
diff --git a/lib/gitlab/ci/trace/stream.rb b/lib/gitlab/ci/trace/stream.rb
index d52194f688b..b3fe3ef1c4d 100644
--- a/lib/gitlab/ci/trace/stream.rb
+++ b/lib/gitlab/ci/trace/stream.rb
@@ -8,7 +8,7 @@ module Gitlab
attr_reader :stream
- delegate :close, :tell, :seek, :size, :path, :truncate, to: :stream, allow_nil: true
+ delegate :close, :tell, :seek, :size, :path, :url, :truncate, to: :stream, allow_nil: true
delegate :valid?, to: :stream, as: :present?, allow_nil: true
diff --git a/lib/gitlab/verify/lfs_objects.rb b/lib/gitlab/verify/lfs_objects.rb
index fe51edbdeeb..970e2a7b718 100644
--- a/lib/gitlab/verify/lfs_objects.rb
+++ b/lib/gitlab/verify/lfs_objects.rb
@@ -12,7 +12,7 @@ module Gitlab
private
def relation
- LfsObject.all
+ LfsObject.with_files_stored_locally
end
def expected_checksum(lfs_object)
diff --git a/lib/gitlab/verify/uploads.rb b/lib/gitlab/verify/uploads.rb
index 6972e517ea5..0ffa71a6d72 100644
--- a/lib/gitlab/verify/uploads.rb
+++ b/lib/gitlab/verify/uploads.rb
@@ -12,7 +12,7 @@ module Gitlab
private
def relation
- Upload.all
+ Upload.with_files_stored_locally
end
def expected_checksum(upload)
diff --git a/lib/tasks/gitlab/artifacts/migrate.rake b/lib/tasks/gitlab/artifacts/migrate.rake
new file mode 100644
index 00000000000..bfca4bfb3f7
--- /dev/null
+++ b/lib/tasks/gitlab/artifacts/migrate.rake
@@ -0,0 +1,25 @@
+require 'logger'
+require 'resolv-replace'
+
+desc "GitLab | Migrate files for artifacts to comply with new storage format"
+namespace :gitlab do
+ namespace :artifacts do
+ task migrate: :environment do
+ logger = Logger.new(STDOUT)
+ logger.info('Starting transfer of artifacts')
+
+ Ci::Build.joins(:project)
+ .with_artifacts_stored_locally
+ .find_each(batch_size: 10) do |build|
+ begin
+ build.artifacts_file.migrate!(ObjectStorage::Store::REMOTE)
+ build.artifacts_metadata.migrate!(ObjectStorage::Store::REMOTE)
+
+ logger.info("Transferred artifacts of #{build.id} of #{build.artifacts_size} to object storage")
+ rescue => e
+ logger.error("Failed to transfer artifacts of #{build.id} with error: #{e.message}")
+ end
+ end
+ end
+ end
+end
diff --git a/lib/tasks/gitlab/lfs/migrate.rake b/lib/tasks/gitlab/lfs/migrate.rake
new file mode 100644
index 00000000000..a45e5ca91e0
--- /dev/null
+++ b/lib/tasks/gitlab/lfs/migrate.rake
@@ -0,0 +1,22 @@
+require 'logger'
+
+desc "GitLab | Migrate LFS objects to remote storage"
+namespace :gitlab do
+ namespace :lfs do
+ task migrate: :environment do
+ logger = Logger.new(STDOUT)
+ logger.info('Starting transfer of LFS files to object storage')
+
+ LfsObject.with_files_stored_locally
+ .find_each(batch_size: 10) do |lfs_object|
+ begin
+ lfs_object.file.migrate!(LfsObjectUploader::Store::REMOTE)
+
+ logger.info("Transferred LFS object #{lfs_object.oid} of size #{lfs_object.size.to_i.bytes} to object storage")
+ rescue => e
+ logger.error("Failed to transfer LFS object #{lfs_object.oid} with error: #{e.message}")
+ end
+ end
+ end
+ end
+end
diff --git a/lib/tasks/gitlab/uploads/migrate.rake b/lib/tasks/gitlab/uploads/migrate.rake
new file mode 100644
index 00000000000..c26c3ccb3be
--- /dev/null
+++ b/lib/tasks/gitlab/uploads/migrate.rake
@@ -0,0 +1,33 @@
+namespace :gitlab do
+ namespace :uploads do
+ desc 'GitLab | Uploads | Migrate the uploaded files to object storage'
+ task :migrate, [:uploader_class, :model_class, :mounted_as] => :environment do |task, args|
+ batch_size = ENV.fetch('BATCH', 200).to_i
+ @to_store = ObjectStorage::Store::REMOTE
+ @mounted_as = args.mounted_as&.gsub(':', '')&.to_sym
+ @uploader_class = args.uploader_class.constantize
+ @model_class = args.model_class.constantize
+
+ uploads.each_batch(of: batch_size, &method(:enqueue_batch)) # rubocop: disable Cop/InBatches
+ end
+
+ def enqueue_batch(batch, index)
+ job = ObjectStorage::MigrateUploadsWorker.enqueue!(batch,
+ @mounted_as,
+ @to_store)
+ puts "Enqueued job ##{index}: #{job}"
+ rescue ObjectStorage::MigrateUploadsWorker::SanityCheckError => e
+ # continue for the next batch
+ puts "Could not enqueue batch (#{batch.ids}) #{e.message}".color(:red)
+ end
+
+ def uploads
+ Upload.class_eval { include EachBatch } unless Upload < EachBatch
+
+ Upload
+ .where.not(store: @to_store)
+ .where(uploader: @uploader_class.to_s,
+ model_type: @model_class.base_class.sti_name)
+ end
+ end
+end