diff options
author | Sean McGivern <sean@mcgivern.me.uk> | 2018-03-27 08:20:03 +0000 |
---|---|---|
committer | Sean McGivern <sean@mcgivern.me.uk> | 2018-03-27 08:20:03 +0000 |
commit | 835fe4d324b5a89c09f06d74d5ddc73a2c41a38b (patch) | |
tree | 471a633221bf9d44bf4af94ff3137a94a2bf1407 /lib | |
parent | ab8f13c3ef6e07eb8d44805dc9eef4b008e1bbe9 (diff) | |
parent | 6d63a098f9a40f87ad922fa4c0ddcfb754e60023 (diff) | |
download | gitlab-ce-835fe4d324b5a89c09f06d74d5ddc73a2c41a38b.tar.gz |
Merge branch '40781-os-to-ce' into 'master'
Bring Object Storage to CE
Closes #4171, #4163, #3370, #2841, and #29203
See merge request gitlab-org/gitlab-ce!17358
Diffstat (limited to 'lib')
-rw-r--r-- | lib/api/helpers.rb | 16 | ||||
-rw-r--r-- | lib/api/job_artifacts.rb | 4 | ||||
-rw-r--r-- | lib/api/jobs.rb | 2 | ||||
-rw-r--r-- | lib/api/project_export.rb | 2 | ||||
-rw-r--r-- | lib/api/runner.rb | 3 | ||||
-rw-r--r-- | lib/api/v3/builds.rb | 6 | ||||
-rw-r--r-- | lib/gitlab/ci/trace/http_io.rb | 187 | ||||
-rw-r--r-- | lib/gitlab/ci/trace/stream.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/verify/lfs_objects.rb | 2 | ||||
-rw-r--r-- | lib/gitlab/verify/uploads.rb | 2 | ||||
-rw-r--r-- | lib/tasks/gitlab/artifacts/migrate.rake | 25 | ||||
-rw-r--r-- | lib/tasks/gitlab/lfs/migrate.rake | 22 | ||||
-rw-r--r-- | lib/tasks/gitlab/uploads/migrate.rake | 33 |
13 files changed, 289 insertions, 17 deletions
diff --git a/lib/api/helpers.rb b/lib/api/helpers.rb index e4fca77ab5d..e59e8a45908 100644 --- a/lib/api/helpers.rb +++ b/lib/api/helpers.rb @@ -410,7 +410,7 @@ module API ) end - def present_file!(path, filename, content_type = 'application/octet-stream') + def present_disk_file!(path, filename, content_type = 'application/octet-stream') filename ||= File.basename(path) header['Content-Disposition'] = "attachment; filename=#{filename}" header['Content-Transfer-Encoding'] = 'binary' @@ -426,13 +426,17 @@ module API end end - def present_artifacts!(artifacts_file) - return not_found! unless artifacts_file.exists? + def present_carrierwave_file!(file, supports_direct_download: true) + return not_found! unless file.exists? - if artifacts_file.file_storage? - present_file!(artifacts_file.path, artifacts_file.filename) + if file.file_storage? + present_disk_file!(file.path, file.filename) + elsif supports_direct_download && file.class.direct_download_enabled? + redirect(file.url) else - redirect_to(artifacts_file.url) + header(*Gitlab::Workhorse.send_url(file.url)) + status :ok + body end end diff --git a/lib/api/job_artifacts.rb b/lib/api/job_artifacts.rb index 47e5eeab31d..b1adef49d46 100644 --- a/lib/api/job_artifacts.rb +++ b/lib/api/job_artifacts.rb @@ -28,7 +28,7 @@ module API builds = user_project.latest_successful_builds_for(params[:ref_name]) latest_build = builds.find_by!(name: params[:job]) - present_artifacts!(latest_build.artifacts_file) + present_carrierwave_file!(latest_build.artifacts_file) end desc 'Download the artifacts archive from a job' do @@ -43,7 +43,7 @@ module API build = find_build!(params[:job_id]) - present_artifacts!(build.artifacts_file) + present_carrierwave_file!(build.artifacts_file) end desc 'Download a specific file from artifacts archive' do diff --git a/lib/api/jobs.rb b/lib/api/jobs.rb index 9c205514b3a..60911c8d733 100644 --- a/lib/api/jobs.rb +++ b/lib/api/jobs.rb @@ -72,7 +72,7 @@ module API present build, with: Entities::Job end - # TODO: We should use `present_file!` and leave this implementation for backward compatibility (when build trace + # TODO: We should use `present_disk_file!` and leave this implementation for backward compatibility (when build trace # is saved in the DB instead of file). But before that, we need to consider how to replace the value of # `runners_token` with some mask (like `xxxxxx`) when sending trace file directly by workhorse. desc 'Get a trace of a specific job of a project' diff --git a/lib/api/project_export.rb b/lib/api/project_export.rb index b0a7fd6f4ab..efc4a33ae1b 100644 --- a/lib/api/project_export.rb +++ b/lib/api/project_export.rb @@ -25,7 +25,7 @@ module API render_api_error!('404 Not found or has expired', 404) unless path - present_file!(path, File.basename(path), 'application/gzip') + present_disk_file!(path, File.basename(path), 'application/gzip') end desc 'Start export' do diff --git a/lib/api/runner.rb b/lib/api/runner.rb index 7e6c33ec33d..8da97a97754 100644 --- a/lib/api/runner.rb +++ b/lib/api/runner.rb @@ -244,11 +244,12 @@ module API params do requires :id, type: Integer, desc: %q(Job's ID) optional :token, type: String, desc: %q(Job's authentication token) + optional :direct_download, default: false, type: Boolean, desc: %q(Perform direct download from remote storage instead of proxying artifacts) end get '/:id/artifacts' do job = authenticate_job! - present_artifacts!(job.artifacts_file) + present_carrierwave_file!(job.artifacts_file, supports_direct_download: params[:direct_download]) end end end diff --git a/lib/api/v3/builds.rb b/lib/api/v3/builds.rb index ac76fece931..683b9c993cb 100644 --- a/lib/api/v3/builds.rb +++ b/lib/api/v3/builds.rb @@ -85,7 +85,7 @@ module API build = get_build!(params[:build_id]) - present_artifacts!(build.artifacts_file) + present_carrierwave_file!(build.artifacts_file) end desc 'Download the artifacts file from build' do @@ -102,10 +102,10 @@ module API builds = user_project.latest_successful_builds_for(params[:ref_name]) latest_build = builds.find_by!(name: params[:job]) - present_artifacts!(latest_build.artifacts_file) + present_carrierwave_file!(latest_build.artifacts_file) end - # TODO: We should use `present_file!` and leave this implementation for backward compatibility (when build trace + # TODO: We should use `present_disk_file!` and leave this implementation for backward compatibility (when build trace # is saved in the DB instead of file). But before that, we need to consider how to replace the value of # `runners_token` with some mask (like `xxxxxx`) when sending trace file directly by workhorse. desc 'Get a trace of a specific build of a project' diff --git a/lib/gitlab/ci/trace/http_io.rb b/lib/gitlab/ci/trace/http_io.rb new file mode 100644 index 00000000000..ac4308f4e2c --- /dev/null +++ b/lib/gitlab/ci/trace/http_io.rb @@ -0,0 +1,187 @@ +## +# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html) +# source: https://gitlab.com/snippets/1685610 +module Gitlab + module Ci + class Trace + class HttpIO + BUFFER_SIZE = 128.kilobytes + + InvalidURLError = Class.new(StandardError) + FailedToGetChunkError = Class.new(StandardError) + + attr_reader :uri, :size + attr_reader :tell + attr_reader :chunk, :chunk_range + + alias_method :pos, :tell + + def initialize(url, size) + raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url) + + @uri = URI(url) + @size = size + @tell = 0 + end + + def close + # no-op + end + + def binmode + # no-op + end + + def binmode? + true + end + + def path + nil + end + + def url + @uri.to_s + end + + def seek(pos, where = IO::SEEK_SET) + new_pos = + case where + when IO::SEEK_END + size + pos + when IO::SEEK_SET + pos + when IO::SEEK_CUR + tell + pos + else + -1 + end + + raise 'new position is outside of file' if new_pos < 0 || new_pos > size + + @tell = new_pos + end + + def eof? + tell == size + end + + def each_line + until eof? + line = readline + break if line.nil? + + yield(line) + end + end + + def read(length = nil) + out = "" + + until eof? || (length && out.length >= length) + data = get_chunk + break if data.empty? + + out << data + @tell += data.bytesize + end + + out = out[0, length] if length && out.length > length + + out + end + + def readline + out = "" + + until eof? + data = get_chunk + new_line = data.index("\n") + + if !new_line.nil? + out << data[0..new_line] + @tell += new_line + 1 + break + else + out << data + @tell += data.bytesize + end + end + + out + end + + def write(data) + raise NotImplementedError + end + + def truncate(offset) + raise NotImplementedError + end + + def flush + raise NotImplementedError + end + + def present? + true + end + + private + + ## + # The below methods are not implemented in IO class + # + def in_range? + @chunk_range&.include?(tell) + end + + def get_chunk + unless in_range? + response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https') do |http| + http.request(request) + end + + raise FailedToGetChunkError unless response.code == '200' || response.code == '206' + + @chunk = response.body.force_encoding(Encoding::BINARY) + @chunk_range = response.content_range + + ## + # Note: If provider does not return content_range, then we set it as we requested + # Provider: minio + # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # Provider: AWS + # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # Provider: GCS + # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPOK 200 + @chunk_range ||= (chunk_start...(chunk_start + @chunk.length)) + end + + @chunk[chunk_offset..BUFFER_SIZE] + end + + def request + Net::HTTP::Get.new(uri).tap do |request| + request.set_range(chunk_start, BUFFER_SIZE) + end + end + + def chunk_offset + tell % BUFFER_SIZE + end + + def chunk_start + (tell / BUFFER_SIZE) * BUFFER_SIZE + end + + def chunk_end + [chunk_start + BUFFER_SIZE, size].min + end + end + end + end +end diff --git a/lib/gitlab/ci/trace/stream.rb b/lib/gitlab/ci/trace/stream.rb index d52194f688b..b3fe3ef1c4d 100644 --- a/lib/gitlab/ci/trace/stream.rb +++ b/lib/gitlab/ci/trace/stream.rb @@ -8,7 +8,7 @@ module Gitlab attr_reader :stream - delegate :close, :tell, :seek, :size, :path, :truncate, to: :stream, allow_nil: true + delegate :close, :tell, :seek, :size, :path, :url, :truncate, to: :stream, allow_nil: true delegate :valid?, to: :stream, as: :present?, allow_nil: true diff --git a/lib/gitlab/verify/lfs_objects.rb b/lib/gitlab/verify/lfs_objects.rb index fe51edbdeeb..970e2a7b718 100644 --- a/lib/gitlab/verify/lfs_objects.rb +++ b/lib/gitlab/verify/lfs_objects.rb @@ -12,7 +12,7 @@ module Gitlab private def relation - LfsObject.all + LfsObject.with_files_stored_locally end def expected_checksum(lfs_object) diff --git a/lib/gitlab/verify/uploads.rb b/lib/gitlab/verify/uploads.rb index 6972e517ea5..0ffa71a6d72 100644 --- a/lib/gitlab/verify/uploads.rb +++ b/lib/gitlab/verify/uploads.rb @@ -12,7 +12,7 @@ module Gitlab private def relation - Upload.all + Upload.with_files_stored_locally end def expected_checksum(upload) diff --git a/lib/tasks/gitlab/artifacts/migrate.rake b/lib/tasks/gitlab/artifacts/migrate.rake new file mode 100644 index 00000000000..bfca4bfb3f7 --- /dev/null +++ b/lib/tasks/gitlab/artifacts/migrate.rake @@ -0,0 +1,25 @@ +require 'logger' +require 'resolv-replace' + +desc "GitLab | Migrate files for artifacts to comply with new storage format" +namespace :gitlab do + namespace :artifacts do + task migrate: :environment do + logger = Logger.new(STDOUT) + logger.info('Starting transfer of artifacts') + + Ci::Build.joins(:project) + .with_artifacts_stored_locally + .find_each(batch_size: 10) do |build| + begin + build.artifacts_file.migrate!(ObjectStorage::Store::REMOTE) + build.artifacts_metadata.migrate!(ObjectStorage::Store::REMOTE) + + logger.info("Transferred artifacts of #{build.id} of #{build.artifacts_size} to object storage") + rescue => e + logger.error("Failed to transfer artifacts of #{build.id} with error: #{e.message}") + end + end + end + end +end diff --git a/lib/tasks/gitlab/lfs/migrate.rake b/lib/tasks/gitlab/lfs/migrate.rake new file mode 100644 index 00000000000..a45e5ca91e0 --- /dev/null +++ b/lib/tasks/gitlab/lfs/migrate.rake @@ -0,0 +1,22 @@ +require 'logger' + +desc "GitLab | Migrate LFS objects to remote storage" +namespace :gitlab do + namespace :lfs do + task migrate: :environment do + logger = Logger.new(STDOUT) + logger.info('Starting transfer of LFS files to object storage') + + LfsObject.with_files_stored_locally + .find_each(batch_size: 10) do |lfs_object| + begin + lfs_object.file.migrate!(LfsObjectUploader::Store::REMOTE) + + logger.info("Transferred LFS object #{lfs_object.oid} of size #{lfs_object.size.to_i.bytes} to object storage") + rescue => e + logger.error("Failed to transfer LFS object #{lfs_object.oid} with error: #{e.message}") + end + end + end + end +end diff --git a/lib/tasks/gitlab/uploads/migrate.rake b/lib/tasks/gitlab/uploads/migrate.rake new file mode 100644 index 00000000000..c26c3ccb3be --- /dev/null +++ b/lib/tasks/gitlab/uploads/migrate.rake @@ -0,0 +1,33 @@ +namespace :gitlab do + namespace :uploads do + desc 'GitLab | Uploads | Migrate the uploaded files to object storage' + task :migrate, [:uploader_class, :model_class, :mounted_as] => :environment do |task, args| + batch_size = ENV.fetch('BATCH', 200).to_i + @to_store = ObjectStorage::Store::REMOTE + @mounted_as = args.mounted_as&.gsub(':', '')&.to_sym + @uploader_class = args.uploader_class.constantize + @model_class = args.model_class.constantize + + uploads.each_batch(of: batch_size, &method(:enqueue_batch)) # rubocop: disable Cop/InBatches + end + + def enqueue_batch(batch, index) + job = ObjectStorage::MigrateUploadsWorker.enqueue!(batch, + @mounted_as, + @to_store) + puts "Enqueued job ##{index}: #{job}" + rescue ObjectStorage::MigrateUploadsWorker::SanityCheckError => e + # continue for the next batch + puts "Could not enqueue batch (#{batch.ids}) #{e.message}".color(:red) + end + + def uploads + Upload.class_eval { include EachBatch } unless Upload < EachBatch + + Upload + .where.not(store: @to_store) + .where(uploader: @uploader_class.to_s, + model_type: @model_class.base_class.sti_name) + end + end +end |