summaryrefslogtreecommitdiff
path: root/lib/gitlab/http_io.rb
blob: 6a9fb85b054a6af39d5ef18418411474786f2936 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# frozen_string_literal: true

##
# This class is compatible with IO class (https://ruby-doc.org/core-2.3.1/IO.html)
# source: https://gitlab.com/snippets/1685610
module Gitlab
  class HttpIO
    BUFFER_SIZE = 128.kilobytes

    InvalidURLError = Class.new(StandardError)
    FailedToGetChunkError = Class.new(StandardError)

    attr_reader :uri, :size
    attr_reader :tell
    attr_reader :chunk, :chunk_range

    alias_method :pos, :tell

    def initialize(url, size)
      raise InvalidURLError unless ::Gitlab::UrlSanitizer.valid?(url)

      @uri = URI(url)
      @size = size
      @tell = 0
    end

    def close
      # no-op
    end

    def binmode
      # no-op
    end

    def binmode?
      true
    end

    def path
      nil
    end

    def url
      @uri.to_s
    end

    def seek(pos, where = IO::SEEK_SET)
      new_pos =
        case where
        when IO::SEEK_END
          size + pos
        when IO::SEEK_SET
          pos
        when IO::SEEK_CUR
          tell + pos
        else
          -1
        end

      raise 'new position is outside of file' if new_pos < 0 || new_pos > size

      @tell = new_pos
    end

    def eof?
      tell == size
    end

    def each_line
      until eof?
        line = readline
        break if line.nil?

        yield(line)
      end
    end

    def read(length = nil, outbuf = nil)
      out = []

      length ||= size - tell

      until length <= 0 || eof?
        data = get_chunk
        break if data.empty?

        chunk_bytes = [BUFFER_SIZE - chunk_offset, length].min
        data_slice = data.byteslice(0, chunk_bytes)

        out << data_slice
        @tell += data_slice.bytesize
        length -= data_slice.bytesize
      end

      out = out.join

      # If outbuf is passed, we put the output into the buffer. This supports IO.copy_stream functionality
      if outbuf
        outbuf.replace(out)
      end

      out
    end

    def readline
      out = []

      until eof?
        data = get_chunk
        new_line = data.index("\n")

        if !new_line.nil?
          out << data[0..new_line]
          @tell += new_line + 1
          break
        else
          out << data
          @tell += data.bytesize
        end
      end

      out.join
    end

    def write(data)
      raise NotImplementedError
    end

    def truncate(offset)
      raise NotImplementedError
    end

    def flush
      raise NotImplementedError
    end

    def present?
      true
    end

    private

    ##
    # The below methods are not implemented in IO class
    #
    def in_range?
      @chunk_range&.include?(tell)
    end

    def get_chunk
      unless in_range?
        response = Net::HTTP.start(uri.hostname, uri.port, proxy_from_env: true, use_ssl: uri.scheme == 'https') do |http|
          http.request(request)
        end

        raise FailedToGetChunkError unless response.code == '200' || response.code == '206'

        @chunk = response.body.force_encoding(Encoding::BINARY)
        @chunk_range = response.content_range

        ##
        # Note: If provider does not return content_range, then we set it as we requested
        # Provider: minio
        # - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
        # - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
        # Provider: AWS
        # - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
        # - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
        # Provider: GCS
        # - When the file size is larger than requested Content-range, the Content-range is included in responses with Net::HTTPPartialContent 206
        # - When the file size is smaller than requested Content-range, the Content-range is included in responses with Net::HTTPOK 200
        @chunk_range ||= (chunk_start...(chunk_start + @chunk.bytesize))
      end

      @chunk[chunk_offset..BUFFER_SIZE]
    end

    def request
      Net::HTTP::Get.new(uri).tap do |request|
        request.set_range(chunk_start, BUFFER_SIZE)
      end
    end

    def chunk_offset
      tell % BUFFER_SIZE
    end

    def chunk_start
      (tell / BUFFER_SIZE) * BUFFER_SIZE
    end

    def chunk_end
      [chunk_start + BUFFER_SIZE, size].min
    end
  end
end