diff options
author | Kamil Trzciński <ayufan@ayufan.eu> | 2018-07-09 13:34:18 +0200 |
---|---|---|
committer | Kamil Trzciński <ayufan@ayufan.eu> | 2018-07-09 14:19:52 +0200 |
commit | bc00803af03147452c12e9e2c7e8f0c0cba86f73 (patch) | |
tree | 5d360cbb1422b7d063bffa4fe40bcf1e64b61db7 /lib | |
parent | 67157de1e4cc482b5321ba2f246bfd80a7893f93 (diff) | |
download | gitlab-ce-bc00803af03147452c12e9e2c7e8f0c0cba86f73.tar.gz |
Access metadata directly from Object Storage
Previously we would pull the file, now, we just stream-it as needed from Object Storage
Diffstat (limited to 'lib')
-rw-r--r-- | lib/gitlab/ci/build/artifacts/metadata.rb | 19 | ||||
-rw-r--r-- | lib/gitlab/ci/trace/http_io.rb | 197 | ||||
-rw-r--r-- | lib/gitlab/http_io.rb | 193 |
3 files changed, 208 insertions, 201 deletions
diff --git a/lib/gitlab/ci/build/artifacts/metadata.rb b/lib/gitlab/ci/build/artifacts/metadata.rb index 0bbd60d8ffe..375d8bc1ff5 100644 --- a/lib/gitlab/ci/build/artifacts/metadata.rb +++ b/lib/gitlab/ci/build/artifacts/metadata.rb @@ -7,14 +7,15 @@ module Gitlab module Artifacts class Metadata ParserError = Class.new(StandardError) + InvalidStreamError = Class.new(StandardError) VERSION_PATTERN = /^[\w\s]+(\d+\.\d+\.\d+)/ INVALID_PATH_PATTERN = %r{(^\.?\.?/)|(/\.?\.?/)} - attr_reader :file, :path, :full_version + attr_reader :stream, :path, :full_version - def initialize(file, path, **opts) - @file, @path, @opts = file, path, opts + def initialize(stream, path, **opts) + @stream, @path, @opts = stream, path, opts @full_version = read_version end @@ -103,7 +104,17 @@ module Gitlab end def gzip(&block) - Zlib::GzipReader.open(@file, &block) + raise InvalidStreamError, "Invalid stream" unless @stream + + # restart gzip reading + @stream.seek(0) + + gz = Zlib::GzipReader.new(@stream) + yield(gz) + rescue Zlib::Error => e + raise InvalidStreamError, e.message + ensure + gz&.finish end end end diff --git a/lib/gitlab/ci/trace/http_io.rb b/lib/gitlab/ci/trace/http_io.rb deleted file mode 100644 index 8788af57a67..00000000000 --- a/lib/gitlab/ci/trace/http_io.rb +++ /dev/null @@ -1,197 +0,0 @@ -## -# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html) -# source: https://gitlab.com/snippets/1685610 -module Gitlab - module Ci - class Trace - class HttpIO - BUFFER_SIZE = 128.kilobytes - - InvalidURLError = Class.new(StandardError) - FailedToGetChunkError = Class.new(StandardError) - - attr_reader :uri, :size - attr_reader :tell - attr_reader :chunk, :chunk_range - - alias_method :pos, :tell - - def initialize(url, size) - raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url) - - @uri = URI(url) - @size = size - @tell = 0 - end - - def close - # no-op - end - - def binmode - # no-op - end - - def binmode? - true - end - - def path - nil - end - - def url - @uri.to_s - end - - def seek(pos, where = IO::SEEK_SET) - new_pos = - case where - when IO::SEEK_END - size + pos - when IO::SEEK_SET - pos - when IO::SEEK_CUR - tell + pos - else - -1 - end - - raise 'new position is outside of file' if new_pos < 0 || new_pos > size - - @tell = new_pos - end - - def eof? - tell == size - end - - def each_line - until eof? - line = readline - break if line.nil? - - yield(line) - end - end - - def read(length = nil, outbuf = "") - out = "" - - length ||= size - tell - - until length <= 0 || eof? - data = get_chunk - break if data.empty? - - chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min - chunk_data = data.byteslice(0, chunk_bytes) - - out << chunk_data - @tell += chunk_data.bytesize - length -= chunk_data.bytesize - end - - # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality - if outbuf - outbuf.slice!(0, outbuf.bytesize) - outbuf << out - end - - out - end - - def readline - out = "" - - until eof? - data = get_chunk - new_line = data.index("\n") - - if !new_line.nil? - out << data[0..new_line] - @tell += new_line + 1 - break - else - out << data - @tell += data.bytesize - end - end - - out - end - - def write(data) - raise NotImplementedError - end - - def truncate(offset) - raise NotImplementedError - end - - def flush - raise NotImplementedError - end - - def present? - true - end - - private - - ## - # The below methods are not implemented in IO class - # - def in_range? - @chunk_range&.include?(tell) - end - - def get_chunk - unless in_range? - response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http| - http.request(request) - end - - raise FailedToGetChunkError unless response.code == '200' || response.code == '206' - - @chunk = response.body.force_encoding(Encoding::BINARY) - @chunk_range = response.content_range - - ## - # Note: If provider does not return content_range, then we set it as we requested - # Provider: minio - # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 - # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 - # Provider: AWS - # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 - # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 - # Provider: GCS - # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 - # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPOK 200 - @chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize)) - end - - @chunk[chunk_offset..BUFFER_SIZE] - end - - def request - Net::HTTP::Get.new(uri).tap do |request| - request.set_range(chunk_start, BUFFER_SIZE) - end - end - - def chunk_offset - tell % BUFFER_SIZE - end - - def chunk_start - (tell / BUFFER_SIZE) * BUFFER_SIZE - end - - def chunk_end - [chunk_start + BUFFER_SIZE, size].min - end - end - end - end -end diff --git a/lib/gitlab/http_io.rb b/lib/gitlab/http_io.rb new file mode 100644 index 00000000000..ce24817db54 --- /dev/null +++ b/lib/gitlab/http_io.rb @@ -0,0 +1,193 @@ +## +# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html) +# source: https://gitlab.com/snippets/1685610 +module Gitlab + class HttpIO + BUFFER_SIZE = 128.kilobytes + + InvalidURLError = Class.new(StandardError) + FailedToGetChunkError = Class.new(StandardError) + + attr_reader :uri, :size + attr_reader :tell + attr_reader :chunk, :chunk_range + + alias_method :pos, :tell + + def initialize(url, size) + raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url) + + @uri = URI(url) + @size = size + @tell = 0 + end + + def close + # no-op + end + + def binmode + # no-op + end + + def binmode? + true + end + + def path + nil + end + + def url + @uri.to_s + end + + def seek(pos, where = IO::SEEK_SET) + new_pos = + case where + when IO::SEEK_END + size + pos + when IO::SEEK_SET + pos + when IO::SEEK_CUR + tell + pos + else + -1 + end + + raise 'new position is outside of file' if new_pos < 0 || new_pos > size + + @tell = new_pos + end + + def eof? + tell == size + end + + def each_line + until eof? + line = readline + break if line.nil? + + yield(line) + end + end + + def read(length = nil, outbuf = "") + out = "" + + length ||= size - tell + + until length <= 0 || eof? + data = get_chunk + break if data.empty? + + chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min + chunk_data = data.byteslice(0, chunk_bytes) + + out << chunk_data + @tell += chunk_data.bytesize + length -= chunk_data.bytesize + end + + # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality + if outbuf + outbuf.slice!(0, outbuf.bytesize) + outbuf << out + end + + out + end + + def readline + out = "" + + until eof? + data = get_chunk + new_line = data.index("\n") + + if !new_line.nil? + out << data[0..new_line] + @tell += new_line + 1 + break + else + out << data + @tell += data.bytesize + end + end + + out + end + + def write(data) + raise NotImplementedError + end + + def truncate(offset) + raise NotImplementedError + end + + def flush + raise NotImplementedError + end + + def present? + true + end + + private + + ## + # The below methods are not implemented in IO class + # + def in_range? + @chunk_range&.include?(tell) + end + + def get_chunk + unless in_range? + response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http| + http.request(request) + end + + raise FailedToGetChunkError unless response.code == '200' || response.code == '206' + + @chunk = response.body.force_encoding(Encoding::BINARY) + @chunk_range = response.content_range + + ## + # Note: If provider does not return content_range, then we set it as we requested + # Provider: minio + # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # Provider: AWS + # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # Provider: GCS + # - When the file size is larger than requested Content-range, the Content-range is included in responces with Net::HTTPPartialContent 206 + # - When the file size is smaller than requested Content-range, the Content-range is included in responces with Net::HTTPOK 200 + @chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize)) + end + + @chunk[chunk_offset..BUFFER_SIZE] + end + + def request + Net::HTTP::Get.new(uri).tap do |request| + request.set_range(chunk_start, BUFFER_SIZE) + end + end + + def chunk_offset + tell % BUFFER_SIZE + end + + def chunk_start + (tell / BUFFER_SIZE) * BUFFER_SIZE + end + + def chunk_end + [chunk_start + BUFFER_SIZE, size].min + end + end +end |