path: root/lib
diff options
authorKamil Trzciński <>2018-07-09 13:34:18 +0200
committerKamil Trzciński <>2018-07-09 14:19:52 +0200
commitbc00803af03147452c12e9e2c7e8f0c0cba86f73 (patch)
tree5d360cbb1422b7d063bffa4fe40bcf1e64b61db7 /lib
parent67157de1e4cc482b5321ba2f246bfd80a7893f93 (diff)
Access metadata directly from Object Storage
Previously we would pull the file, now, we just stream-it as needed from Object Storage
Diffstat (limited to 'lib')
3 files changed, 208 insertions, 201 deletions
diff --git a/lib/gitlab/ci/build/artifacts/metadata.rb b/lib/gitlab/ci/build/artifacts/metadata.rb
index 0bbd60d8ffe..375d8bc1ff5 100644
--- a/lib/gitlab/ci/build/artifacts/metadata.rb
+++ b/lib/gitlab/ci/build/artifacts/metadata.rb
@@ -7,14 +7,15 @@ module Gitlab
module Artifacts
class Metadata
ParserError =
+ InvalidStreamError =
VERSION_PATTERN = /^[\w\s]+(\d+\.\d+\.\d+)/
INVALID_PATH_PATTERN = %r{(^\.?\.?/)|(/\.?\.?/)}
- attr_reader :file, :path, :full_version
+ attr_reader :stream, :path, :full_version
- def initialize(file, path, **opts)
- @file, @path, @opts = file, path, opts
+ def initialize(stream, path, **opts)
+ @stream, @path, @opts = stream, path, opts
@full_version = read_version
@@ -103,7 +104,17 @@ module Gitlab
def gzip(&block)
-, &block)
+ raise InvalidStreamError, "Invalid stream" unless @stream
+ # restart gzip reading
+ gz =
+ yield(gz)
+ rescue Zlib::Error => e
+ raise InvalidStreamError, e.message
+ ensure
+ gz&.finish
diff --git a/lib/gitlab/ci/trace/http_io.rb b/lib/gitlab/ci/trace/http_io.rb
deleted file mode 100644
index 8788af57a67..00000000000
--- a/lib/gitlab/ci/trace/http_io.rb
+++ /dev/null
@@ -1,197 +0,0 @@
-# This class is compatible with IO class (
-# source:
-module Gitlab
- module Ci
- class Trace
- class HttpIO
- BUFFER_SIZE = 128.kilobytes
- InvalidURLError =
- FailedToGetChunkError =
- attr_reader :uri, :size
- attr_reader :tell
- attr_reader :chunk, :chunk_range
- alias_method :pos, :tell
- def initialize(url, size)
- raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url)
- @uri = URI(url)
- @size = size
- @tell = 0
- end
- def close
- # no-op
- end
- def binmode
- # no-op
- end
- def binmode?
- true
- end
- def path
- nil
- end
- def url
- @uri.to_s
- end
- def seek(pos, where = IO::SEEK_SET)
- new_pos =
- case where
- when IO::SEEK_END
- size + pos
- when IO::SEEK_SET
- pos
- when IO::SEEK_CUR
- tell + pos
- else
- -1
- end
- raise 'new position is outside of file' if new_pos < 0 || new_pos > size
- @tell = new_pos
- end
- def eof?
- tell == size
- end
- def each_line
- until eof?
- line = readline
- break if line.nil?
- yield(line)
- end
- end
- def read(length = nil, outbuf = "")
- out = ""
- length ||= size - tell
- until length <= 0 || eof?
- data = get_chunk
- break if data.empty?
- chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min
- chunk_data = data.byteslice(0, chunk_bytes)
- out << chunk_data
- @tell += chunk_data.bytesize
- length -= chunk_data.bytesize
- end
- # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality
- if outbuf
- outbuf.slice!(0, outbuf.bytesize)
- outbuf << out
- end
- out
- end
- def readline
- out = ""
- until eof?
- data = get_chunk
- new_line = data.index("\n")
- if !new_line.nil?
- out << data[0..new_line]
- @tell += new_line + 1
- break
- else
- out << data
- @tell += data.bytesize
- end
- end
- out
- end
- def write(data)
- raise NotImplementedError
- end
- def truncate(offset)
- raise NotImplementedError
- end
- def flush
- raise NotImplementedError
- end
- def present?
- true
- end
- private
- ##
- # The below methods are not implemented in IO class
- #
- def in_range?
- @chunk_range&.include?(tell)
- end
- def get_chunk
- unless in_range?
- response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http|
- http.request(request)
- end
- raise FailedToGetChunkError unless response.code == '200' || response.code == '206'
- @chunk = response.body.force_encoding(Encoding::BINARY)
- @chunk_range = response.content_range
- ##
- # Note: If provider does not return content_range, then we set it as we requested
- # Provider: minio
- # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # Provider: AWS
- # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # Provider: GCS
- # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
- # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPOK 200
- @chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize))
- end
- @chunk[chunk_offset..BUFFER_SIZE]
- end
- def request
- do |request|
- request.set_range(chunk_start, BUFFER_SIZE)
- end
- end
- def chunk_offset
- tell % BUFFER_SIZE
- end
- def chunk_start
- end
- def chunk_end
- [chunk_start + BUFFER_SIZE, size].min
- end
- end
- end
- end
diff --git a/lib/gitlab/http_io.rb b/lib/gitlab/http_io.rb
new file mode 100644
index 00000000000..ce24817db54
--- /dev/null
+++ b/lib/gitlab/http_io.rb
@@ -0,0 +1,193 @@
+# This class is compatible with IO class (
+# source:
+module Gitlab
+ class HttpIO
+ BUFFER_SIZE = 128.kilobytes
+ InvalidURLError =
+ FailedToGetChunkError =
+ attr_reader :uri, :size
+ attr_reader :tell
+ attr_reader :chunk, :chunk_range
+ alias_method :pos, :tell
+ def initialize(url, size)
+ raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url)
+ @uri = URI(url)
+ @size = size
+ @tell = 0
+ end
+ def close
+ # no-op
+ end
+ def binmode
+ # no-op
+ end
+ def binmode?
+ true
+ end
+ def path
+ nil
+ end
+ def url
+ @uri.to_s
+ end
+ def seek(pos, where = IO::SEEK_SET)
+ new_pos =
+ case where
+ when IO::SEEK_END
+ size + pos
+ when IO::SEEK_SET
+ pos
+ when IO::SEEK_CUR
+ tell + pos
+ else
+ -1
+ end
+ raise 'new position is outside of file' if new_pos < 0 || new_pos > size
+ @tell = new_pos
+ end
+ def eof?
+ tell == size
+ end
+ def each_line
+ until eof?
+ line = readline
+ break if line.nil?
+ yield(line)
+ end
+ end
+ def read(length = nil, outbuf = "")
+ out = ""
+ length ||= size - tell
+ until length <= 0 || eof?
+ data = get_chunk
+ break if data.empty?
+ chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min
+ chunk_data = data.byteslice(0, chunk_bytes)
+ out << chunk_data
+ @tell += chunk_data.bytesize
+ length -= chunk_data.bytesize
+ end
+ # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality
+ if outbuf
+ outbuf.slice!(0, outbuf.bytesize)
+ outbuf << out
+ end
+ out
+ end
+ def readline
+ out = ""
+ until eof?
+ data = get_chunk
+ new_line = data.index("\n")
+ if !new_line.nil?
+ out << data[0..new_line]
+ @tell += new_line + 1
+ break
+ else
+ out << data
+ @tell += data.bytesize
+ end
+ end
+ out
+ end
+ def write(data)
+ raise NotImplementedError
+ end
+ def truncate(offset)
+ raise NotImplementedError
+ end
+ def flush
+ raise NotImplementedError
+ end
+ def present?
+ true
+ end
+ private
+ ##
+ # The below methods are not implemented in IO class
+ #
+ def in_range?
+ @chunk_range&.include?(tell)
+ end
+ def get_chunk
+ unless in_range?
+ response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http|
+ http.request(request)
+ end
+ raise FailedToGetChunkError unless response.code == '200' || response.code == '206'
+ @chunk = response.body.force_encoding(Encoding::BINARY)
+ @chunk_range = response.content_range
+ ##
+ # Note: If provider does not return content_range, then we set it as we requested
+ # Provider: minio
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # Provider: AWS
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # Provider: GCS
+ # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206
+ # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPOK 200
+ @chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize))
+ end
+ @chunk[chunk_offset..BUFFER_SIZE]
+ end
+ def request
+ do |request|
+ request.set_range(chunk_start, BUFFER_SIZE)
+ end
+ end
+ def chunk_offset
+ tell % BUFFER_SIZE
+ end
+ def chunk_start
+ end
+ def chunk_end
+ [chunk_start + BUFFER_SIZE, size].min
+ end
+ end