1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
|
# frozen_string_literal: true
# Gitlab::Middleware::Multipart - a Rack::Multipart replacement
#
# Rack::Multipart leaves behind tempfiles in /tmp and uses valuable Ruby
# process time to copy files around. This alternative solution uses
# gitlab-workhorse to clean up the tempfiles and puts the tempfiles in a
# location where copying should not be needed.
#
# When gitlab-workhorse finds files in a multipart MIME body it sends
# a signed message via a request header. This message lists the names of
# the multipart entries that gitlab-workhorse filtered out of the
# multipart structure and saved to tempfiles. Workhorse adds new entries
# in the multipart structure with paths to the tempfiles.
#
# The job of this Rack middleware is to detect and decode the message
# from workhorse. If present, it walks the Rack 'params' hash for the
# current request, opens the respective tempfiles, and inserts the open
# Ruby File objects in the params hash where Rack::Multipart would have
# put them. The goal is that application code deeper down can keep
# working the way it did with Rack::Multipart without changes.
#
# CAVEAT: the code that modifies the params hash is a bit complex. It is
# conceivable that certain Rack params structures will not be modified
# correctly. We are not aware of such bugs at this time though.
#
module Gitlab
module Middleware
class Multipart
RACK_ENV_KEY = 'HTTP_GITLAB_WORKHORSE_MULTIPART_FIELDS'
JWT_PARAM_SUFFIX = '.gitlab-workhorse-upload'
JWT_PARAM_FIXED_KEY = 'upload'
class Handler
def initialize(env, message)
@request = Rack::Request.new(env)
@rewritten_fields = message['rewritten_fields']
@open_files = []
end
def with_open_files
@rewritten_fields.each do |field, tmp_path|
parsed_field = Rack::Utils.parse_nested_query(field)
raise "unexpected field: #{field.inspect}" unless parsed_field.count == 1
key, value = parsed_field.first
if value.nil? # we have a top level param, eg. field = 'foo' and not 'foo[bar]'
raise "invalid field: #{field.inspect}" if field != key
value = open_file(@request.params, key, tmp_path.presence)
@open_files << value
else
value = decorate_params_value(value, @request.params[key], tmp_path.presence)
end
update_param(key, value)
end
yield
ensure
@open_files.compact
.each(&:close)
end
# This function calls itself recursively
def decorate_params_value(path_hash, value_hash, path_override = nil)
unless path_hash.is_a?(Hash) && path_hash.count == 1
raise "invalid path: #{path_hash.inspect}"
end
path_key, path_value = path_hash.first
unless value_hash.is_a?(Hash) && value_hash[path_key]
raise "invalid value hash: #{value_hash.inspect}"
end
case path_value
when nil
value_hash[path_key] = open_file(value_hash.dig(path_key), '', path_override)
@open_files << value_hash[path_key]
value_hash
when Hash
decorate_params_value(path_value, value_hash[path_key], path_override)
value_hash
else
raise "unexpected path value: #{path_value.inspect}"
end
end
def open_file(params, key, path_override = nil)
::UploadedFile.from_params(params, key, allowed_paths, path_override)
end
# update_params ensures that both rails controllers and rack middleware can find
# workhorse accelerate files in the request
def update_param(key, value)
# we make sure we have key in POST otherwise update_params will add it in GET
@request.POST[key] ||= value
# this will force Rack::Request to properly update env keys
@request.update_param(key, value)
# ActionDispatch::Request is based on Rack::Request but it caches params
# inside other env keys, here we ensure everything is updated correctly
ActionDispatch::Request.new(@request.env).update_param(key, value)
end
private
def package_allowed_paths
packages_config = ::Gitlab.config.packages
return [] unless allow_packages_storage_path?(packages_config)
[::Packages::PackageFileUploader.workhorse_upload_path]
end
def allow_packages_storage_path?(packages_config)
return false unless packages_config.enabled
return false unless packages_config['storage_path']
return false if packages_config.object_store.enabled && packages_config.object_store.direct_upload
true
end
def allowed_paths
[
Dir.tmpdir,
::FileUploader.root,
::Gitlab.config.uploads.storage_path,
::JobArtifactUploader.workhorse_upload_path,
::LfsObjectUploader.workhorse_upload_path,
File.join(Rails.root, 'public/uploads/tmp')
] + package_allowed_paths
end
end
# TODO this class is meant to replace Handler when the feature flag
# upload_middleware_jwt_params_handler is removed
# See https://gitlab.com/gitlab-org/gitlab/-/issues/233895#roll-out-steps
class HandlerForJWTParams < Handler
def with_open_files
@rewritten_fields.keys.each do |field|
parsed_field = Rack::Utils.parse_nested_query(field)
raise "unexpected field: #{field.inspect}" unless parsed_field.count == 1
key, value = parsed_field.first
if value.nil? # we have a top level param, eg. field = 'foo' and not 'foo[bar]'
raise "invalid field: #{field.inspect}" if field != key
value = open_file(extract_upload_params_from(@request.params, with_prefix: key))
@open_files << value
else
value = decorate_params_value(value, @request.params[key])
end
update_param(key, value)
end
yield
ensure
@open_files.compact
.each(&:close)
end
# This function calls itself recursively
def decorate_params_value(hash_path, value_hash)
unless hash_path.is_a?(Hash) && hash_path.count == 1
raise "invalid path: #{hash_path.inspect}"
end
path_key, path_value = hash_path.first
unless value_hash.is_a?(Hash) && value_hash[path_key]
raise "invalid value hash: #{value_hash.inspect}"
end
case path_value
when nil
value_hash[path_key] = open_file(extract_upload_params_from(value_hash[path_key]))
@open_files << value_hash[path_key]
value_hash
when Hash
decorate_params_value(path_value, value_hash[path_key])
value_hash
else
raise "unexpected path value: #{path_value.inspect}"
end
end
def open_file(params)
::UploadedFile.from_params_without_field(params, allowed_paths)
end
private
def extract_upload_params_from(params, with_prefix: '')
param_key = "#{with_prefix}#{JWT_PARAM_SUFFIX}"
jwt_token = params[param_key]
raise "Empty JWT param: #{param_key}" if jwt_token.blank?
payload = Gitlab::Workhorse.decode_jwt(jwt_token).first
raise "Invalid JWT payload: not a Hash" unless payload.is_a?(Hash)
upload_params = payload.fetch(JWT_PARAM_FIXED_KEY, {})
raise "Empty params for: #{param_key}" if upload_params.empty?
upload_params
end
end
def initialize(app)
@app = app
end
def call(env)
encoded_message = env.delete(RACK_ENV_KEY)
return @app.call(env) if encoded_message.blank?
message = ::Gitlab::Workhorse.decode_jwt(encoded_message)[0]
handler_class.new(env, message).with_open_files do
@app.call(env)
end
rescue UploadedFile::InvalidPathError => e
[400, { 'Content-Type' => 'text/plain' }, e.message]
end
private
def handler_class
if Feature.enabled?(:upload_middleware_jwt_params_handler, default_enabled: true)
::Gitlab::Middleware::Multipart::HandlerForJWTParams
else
::Gitlab::Middleware::Multipart::Handler
end
end
end
end
end
|