summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKamil Trzciński <ayufan@ayufan.eu>2018-07-09 13:34:18 +0200
committerKamil Trzciński <ayufan@ayufan.eu>2018-07-09 14:19:52 +0200
commitbc00803af03147452c12e9e2c7e8f0c0cba86f73 (patch)
tree5d360cbb1422b7d063bffa4fe40bcf1e64b61db7
parent67157de1e4cc482b5321ba2f246bfd80a7893f93 (diff)
downloadgitlab-ce-bc00803af03147452c12e9e2c7e8f0c0cba86f73.tar.gz
Access metadata directly from Object Storage
Previously we would pull the file, now, we just stream-it as needed from Object Storage
-rw-r--r--app/models/ci/build.rb4
-rw-r--r--app/uploaders/gitlab_uploader.rb17
-rw-r--r--app/uploaders/job_artifact_uploader.rb8
-rw-r--r--changelogs/unreleased/improve-metadata-access-performance.yml5
-rw-r--r--lib/gitlab/ci/build/artifacts/metadata.rb19
-rw-r--r--lib/gitlab/ci/trace/http_io.rb197
-rw-r--r--lib/gitlab/http_io.rb193
-rw-r--r--spec/controllers/projects/jobs_controller_spec.rb2
-rw-r--r--spec/lib/gitlab/ci/build/artifacts/metadata_spec.rb24
-rw-r--r--spec/lib/gitlab/http_io_spec.rb (renamed from spec/lib/gitlab/ci/trace/http_io_spec.rb)2
-rw-r--r--spec/support/http_io/http_io_helpers.rb4
-rw-r--r--spec/uploaders/job_artifact_uploader_spec.rb2
12 files changed, 258 insertions, 219 deletions
diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb
index 19949f83351..acf555a5369 100644
--- a/app/models/ci/build.rb
+++ b/app/models/ci/build.rb
@@ -437,9 +437,9 @@ module Ci
end
def artifacts_metadata_entry(path, **options)
- artifacts_metadata.use_file do |metadata_path|
+ artifacts_metadata.open do |metadata_stream|
metadata = Gitlab::Ci::Build::Artifacts::Metadata.new(
- metadata_path,
+ metadata_stream,
path,
**options)
diff --git a/app/uploaders/gitlab_uploader.rb b/app/uploaders/gitlab_uploader.rb
index 7919f126075..7636acf255c 100644
--- a/app/uploaders/gitlab_uploader.rb
+++ b/app/uploaders/gitlab_uploader.rb
@@ -71,6 +71,23 @@ class GitlabUploader < CarrierWave::Uploader::Base
File.join('/', self.class.base_dir, dynamic_segment, filename)
end
+ def open
+ stream = if file_storage?
+ File.open(path, "rb") if path
+ else
+ ::Gitlab::HttpIO.new(url, cached_size) if url
+ end
+
+ return unless stream
+ return stream unless block_given?
+
+ begin
+ yield(stream)
+ ensure
+ stream.close
+ end
+ end
+
private
# Designed to be overridden by child uploaders that have a dynamic path
diff --git a/app/uploaders/job_artifact_uploader.rb b/app/uploaders/job_artifact_uploader.rb
index 855cf2fc21c..f6af023e0f9 100644
--- a/app/uploaders/job_artifact_uploader.rb
+++ b/app/uploaders/job_artifact_uploader.rb
@@ -18,14 +18,6 @@ class JobArtifactUploader < GitlabUploader
dynamic_segment
end
- def open
- if file_storage?
- File.open(path, "rb") if path
- else
- ::Gitlab::Ci::Trace::HttpIO.new(url, cached_size) if url
- end
- end
-
private
def dynamic_segment
diff --git a/changelogs/unreleased/improve-metadata-access-performance.yml b/changelogs/unreleased/improve-metadata-access-performance.yml
new file mode 100644
index 00000000000..b16fa99dd3b
--- /dev/null
+++ b/changelogs/unreleased/improve-metadata-access-performance.yml
@@ -0,0 +1,5 @@
+---
+title: Access metadata directly from Object Storage
+merge_request:
+author:
+type: performance
diff --git a/lib/gitlab/ci/build/artifacts/metadata.rb b/lib/gitlab/ci/build/artifacts/metadata.rb
index 0bbd60d8ffe..375d8bc1ff5 100644
--- a/lib/gitlab/ci/build/artifacts/metadata.rb
+++ b/lib/gitlab/ci/build/artifacts/metadata.rb
@@ -7,14 +7,15 @@ module Gitlab
module Artifacts
class Metadata
ParserError = Class.new(StandardError)
+ InvalidStreamError = Class.new(StandardError)
VERSION_PATTERN = /^[\w\s]+(\d+\.\d+\.\d+)/
INVALID_PATH_PATTERN = %r{(^\.?\.?/)|(/\.?\.?/)}
- attr_reader :file, :path, :full_version
+ attr_reader :stream, :path, :full_version
- def initialize(file, path, **opts)
- @file, @path, @opts = file, path, opts
+ def initialize(stream, path, **opts)
+ @stream, @path, @opts = stream, path, opts
@full_version = read_version
end
@@ -103,7 +104,17 @@ module Gitlab
end
def gzip(&block)
- Zlib::GzipReader.open(@file, &block)
+ raise InvalidStreamError, "Invalid stream" unless @stream
+
+ # restart gzip reading
+ @stream.seek(0)
+
+ gz = Zlib::GzipReader.new(@stream)
+ yield(gz)
+ rescue Zlib::Error => e
+ raise InvalidStreamError, e.message
+ ensure
+ gz&.finish
end
end
end
diff --git a/lib/gitlab/ci/trace/http_io.rb b/lib/gitlab/ci/trace/http_io.rb
deleted file mode 100644
index 8788af57a67..00000000000
--- a/lib/gitlab/ci/trace/http_io.rb
+++ /dev/null
@@ -1,197 +0,0 @@
-##
-# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html)
-# source: https://gitlab.com/snippets/1685610
-module Gitlab
- module Ci
- class Trace
- class HttpIO
- BUFFER_SIZE = 128.kilobytes
-
- InvalidURLError = Class.new(StandardError)
- FailedToGetChunkError = Class.new(StandardError)
-
- attr_reader :uri, :size
- attr_reader :tell
- attr_reader :chunk, :chunk_range
-
- alias_method :pos, :tell
-
- def initialize(url, size)
- raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url)
-
- @uri = URI(url)
- @size = size
- @tell = 0
- end
-
- def close
- # no-op
- end
-
- def binmode
- # no-op
- end
-
- def binmode?
- true
- end
-
- def path
- nil
- end
-
- def url
- @uri.to_s
- end
-
- def seek(pos, where = IO::SEEK_SET)
- new_pos =
- case where
- when IO::SEEK_END
- size + pos
- when IO::SEEK_SET
- pos
- when IO::SEEK_CUR
- tell + pos
- else
- -1
- end
-
- raise 'new position is outside of file' if new_pos < 0 || new_pos > size
-
- @tell = new_pos
- end
-
- def eof?
- tell == size
- end
-
- def each_line
- until eof?
- line = readline
- break if line.nil?
-
- yield(line)
- end
- end
-
- def read(length = nil, outbuf = "")
- out = ""
-
- length ||= size - tell
-
- until length <= 0 || eof?
- data = get_chunk
- break if data.empty?
-
- chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min
- chunk_data = data.byteslice(0, chunk_bytes)
-
- out << chunk_data
- @tell += chunk_data.bytesize
- length -= chunk_data.bytesize
- end
-
- # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality
- if outbuf
- outbuf.slice!(0, outbuf.bytesize)
- outbuf << out
- end
-
- out
- end
-
- def readline
- out = ""
-
- until eof?
- data = get_chunk
- new_line = data.index("\n")
-
- if !new_line.nil?
- out << data[0..new_line]
- @tell += new_line + 1
- break
- else
- out << data
- @tell += data.bytesize
- end
- end
-
- out
- end
-
- def write(data)
- raise NotImplementedError
- end
-
- def truncate(offset)
- raise NotImplementedError
- end
-
- def flush
- raise NotImplementedError
- end
-
- def present?
- true
- end
-
- private
-
- ##
- # The below methods are not implemented in IO class
- #
- def in_range?
- @chunk_range&.include?(tell)
- end
-
- def get_chunk
- unless in_range?
- response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http|
- http.request(request)
- end
-
- raise FailedToGetChunkError unless response.code == '200' || response.code == '206'
-
- @chunk = response.body.force_encoding(Encoding::BINARY)
- @chunk_range = response.content_range
-
- ##
- # Note: If provider does not return content_range, then we set it as we requested
- # Provider: minio
- # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # Provider: AWS
- # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # Provider: GCS
- # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPOK 200
- @chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize))
- end
-
- @chunk[chunk_offset..BUFFER_SIZE]
- end
-
- def request
- Net::HTTP::Get.new(uri).tap do |request|
- request.set_range(chunk_start, BUFFER_SIZE)
- end
- end
-
- def chunk_offset
- tell % BUFFER_SIZE
- end
-
- def chunk_start
- (tell / BUFFER_SIZE) * BUFFER_SIZE
- end
-
- def chunk_end
- [chunk_start + BUFFER_SIZE, size].min
- end
- end
- end
- end
-end
diff --git a/lib/gitlab/http_io.rb b/lib/gitlab/http_io.rb
new file mode 100644
index 00000000000..ce24817db54
--- /dev/null
+++ b/lib/gitlab/http_io.rb
@@ -0,0 +1,193 @@
+##
+# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html)
+# source: https://gitlab.com/snippets/1685610
+module Gitlab
+ class HttpIO
+ BUFFER_SIZE = 128.kilobytes
+
+ InvalidURLError = Class.new(StandardError)
+ FailedToGetChunkError = Class.new(StandardError)
+
+ attr_reader :uri, :size
+ attr_reader :tell
+ attr_reader :chunk, :chunk_range
+
+ alias_method :pos, :tell
+
+ def initialize(url, size)
+ raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url)
+
+ @uri = URI(url)
+ @size = size
+ @tell = 0
+ end
+
+ def close
+ # no-op
+ end
+
+ def binmode
+ # no-op
+ end
+
+ def binmode?
+ true
+ end
+
+ def path
+ nil
+ end
+
+ def url
+ @uri.to_s
+ end
+
+ def seek(pos, where = IO::SEEK_SET)
+ new_pos =
+ case where
+ when IO::SEEK_END
+ size + pos
+ when IO::SEEK_SET
+ pos
+ when IO::SEEK_CUR
+ tell + pos
+ else
+ -1
+ end
+
+ raise 'new position is outside of file' if new_pos < 0 || new_pos > size
+
+ @tell = new_pos
+ end
+
+ def eof?
+ tell == size
+ end
+
+ def each_line
+ until eof?
+ line = readline
+ break if line.nil?
+
+ yield(line)
+ end
+ end
+
+ def read(length = nil, outbuf = "")
+ out = ""
+
+ length ||= size - tell
+
+ until length <= 0 || eof?
+ data = get_chunk
+ break if data.empty?
+
+ chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min
+ chunk_data = data.byteslice(0, chunk_bytes)
+
+ out << chunk_data
+ @tell += chunk_data.bytesize
+ length -= chunk_data.bytesize
+ end
+
+ # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality
+ if outbuf
+ outbuf.slice!(0, outbuf.bytesize)
+ outbuf << out
+ end
+
+ out
+ end
+
+ def readline
+ out = ""
+
+ until eof?
+ data = get_chunk
+ new_line = data.index("\n")
+
+ if !new_line.nil?
+ out << data[0..new_line]
+ @tell += new_line + 1
+ break
+ else
+ out << data
+ @tell += data.bytesize
+ end
+ end
+
+ out
+ end
+
+ def write(data)
+ raise NotImplementedError
+ end
+
+ def truncate(offset)
+ raise NotImplementedError
+ end
+
+ def flush
+ raise NotImplementedError
+ end
+
+ def present?
+ true
+ end
+
+ private
+
+ ##
+ # The below methods are not implemented in IO class
+ #
+ def in_range?
+ @chunk_range&.include?(tell)
+ end
+
+ def get_chunk
+ unless in_range?
+ response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http|
+ http.request(request)
+ end
+
+ raise FailedToGetChunkError unless response.code == '200' || response.code == '206'
+
+ @chunk = response.body.force_encoding(Encoding::BINARY)
+ @chunk_range = response.content_range
+
+ ##
+ # Note: If provider does not return content_range, then we set it as we requested
+ # Provider: minio
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # Provider: AWS
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # Provider: GCS
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPOK 200
+ @chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize))
+ end
+
+ @chunk[chunk_offset..BUFFER_SIZE]
+ end
+
+ def request
+ Net::HTTP::Get.new(uri).tap do |request|
+ request.set_range(chunk_start, BUFFER_SIZE)
+ end
+ end
+
+ def chunk_offset
+ tell % BUFFER_SIZE
+ end
+
+ def chunk_start
+ (tell / BUFFER_SIZE) * BUFFER_SIZE
+ end
+
+ def chunk_end
+ [chunk_start + BUFFER_SIZE, size].min
+ end
+ end
+end
diff --git a/spec/controllers/projects/jobs_controller_spec.rb b/spec/controllers/projects/jobs_controller_spec.rb
index e6332a10265..5f53a2d087a 100644
--- a/spec/controllers/projects/jobs_controller_spec.rb
+++ b/spec/controllers/projects/jobs_controller_spec.rb
@@ -245,7 +245,7 @@ describe Projects::JobsController, :clean_gitlab_redis_shared_state do
end
it 'returns a trace' do
- expect { get_trace }.to raise_error(Gitlab::Ci::Trace::HttpIO::FailedToGetChunkError)
+ expect { get_trace }.to raise_error(Gitlab::HttpIO::FailedToGetChunkError)
end
end
end
diff --git a/spec/lib/gitlab/ci/build/artifacts/metadata_spec.rb b/spec/lib/gitlab/ci/build/artifacts/metadata_spec.rb
index 6a52ae01b2f..e327399d82d 100644
--- a/spec/lib/gitlab/ci/build/artifacts/metadata_spec.rb
+++ b/spec/lib/gitlab/ci/build/artifacts/metadata_spec.rb
@@ -2,13 +2,21 @@ require 'spec_helper'
describe Gitlab::Ci::Build::Artifacts::Metadata do
def metadata(path = '', **opts)
- described_class.new(metadata_file_path, path, **opts)
+ described_class.new(metadata_file_stream, path, **opts)
end
let(:metadata_file_path) do
Rails.root + 'spec/fixtures/ci_build_artifacts_metadata.gz'
end
+ let(:metadata_file_stream) do
+ File.open(metadata_file_path) if metadata_file_path
+ end
+
+ after do
+ metadata_file_stream&.close
+ end
+
context 'metadata file exists' do
describe '#find_entries! empty string' do
subject { metadata('').find_entries! }
@@ -86,11 +94,21 @@ describe Gitlab::Ci::Build::Artifacts::Metadata do
end
context 'metadata file does not exist' do
- let(:metadata_file_path) { '' }
+ let(:metadata_file_path) { nil }
+
+ describe '#find_entries!' do
+ it 'raises error' do
+ expect { metadata.find_entries! }.to raise_error(described_class::InvalidStreamError, /Invalid stream/)
+ end
+ end
+ end
+
+ context 'metadata file is invalid' do
+ let(:metadata_file_path) { Rails.root + 'spec/fixtures/ci_build_artifacts.zip' }
describe '#find_entries!' do
it 'raises error' do
- expect { metadata.find_entries! }.to raise_error(Errno::ENOENT)
+ expect { metadata.find_entries! }.to raise_error(described_class::InvalidStreamError, /not in gzip format/)
end
end
end
diff --git a/spec/lib/gitlab/ci/trace/http_io_spec.rb b/spec/lib/gitlab/http_io_spec.rb
index 5474e2f518c..2c9a73609a9 100644
--- a/spec/lib/gitlab/ci/trace/http_io_spec.rb
+++ b/spec/lib/gitlab/http_io_spec.rb
@@ -1,6 +1,6 @@
require 'spec_helper'
-describe Gitlab::Ci::Trace::HttpIO do
+describe Gitlab::HttpIO do
include HttpIOHelpers
let(:http_io) { described_class.new(url, size) }
diff --git a/spec/support/http_io/http_io_helpers.rb b/spec/support/http_io/http_io_helpers.rb
index 2c68c2cd9a6..86b254aa67d 100644
--- a/spec/support/http_io/http_io_helpers.rb
+++ b/spec/support/http_io/http_io_helpers.rb
@@ -54,12 +54,12 @@ module HttpIOHelpers
def set_smaller_buffer_size_than(file_size)
blocks = (file_size / 128)
new_size = (blocks / 2) * 128
- stub_const("Gitlab::Ci::Trace::HttpIO::BUFFER_SIZE", new_size)
+ stub_const("Gitlab::HttpIO::BUFFER_SIZE", new_size)
end
def set_larger_buffer_size_than(file_size)
blocks = (file_size / 128)
new_size = (blocks * 2) * 128
- stub_const("Gitlab::Ci::Trace::HttpIO::BUFFER_SIZE", new_size)
+ stub_const("Gitlab::HttpIO::BUFFER_SIZE", new_size)
end
end
diff --git a/spec/uploaders/job_artifact_uploader_spec.rb b/spec/uploaders/job_artifact_uploader_spec.rb
index 026e4356ed6..d0b14768867 100644
--- a/spec/uploaders/job_artifact_uploader_spec.rb
+++ b/spec/uploaders/job_artifact_uploader_spec.rb
@@ -55,7 +55,7 @@ describe JobArtifactUploader do
end
it 'returns http io stream' do
- is_expected.to be_a(Gitlab::Ci::Trace::HttpIO)
+ is_expected.to be_a(Gitlab::HttpIO)
end
end
end