1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
|
# frozen_string_literal: true
# Downloads a remote file. If no filename is given, it'll use the remote filename
module BulkImports
class FileDownloadService
ServiceError = Class.new(StandardError)
REMOTE_FILENAME_PATTERN = %r{filename="(?<filename>[^"]+)"}.freeze
FILENAME_SIZE_LIMIT = 255 # chars before the extension
def initialize(configuration:, relative_url:, dir:, file_size_limit:, allowed_content_types:, filename: nil)
@configuration = configuration
@relative_url = relative_url
@filename = filename
@dir = dir
@file_size_limit = file_size_limit
@allowed_content_types = allowed_content_types
end
def execute
validate_dir
validate_url
validate_content_type
validate_content_length
download_file
validate_symlink
filepath
end
private
attr_reader :configuration, :relative_url, :dir, :file_size_limit, :allowed_content_types
def download_file
File.open(filepath, 'wb') do |file|
bytes_downloaded = 0
http_client.stream(relative_url) do |chunk|
bytes_downloaded += chunk.size
validate_size!(bytes_downloaded)
raise(ServiceError, "File download error #{chunk.code}") unless chunk.code == 200
file.write(chunk)
end
end
rescue StandardError => e
File.delete(filepath) if File.exist?(filepath)
raise e
end
def http_client
@http_client ||= BulkImports::Clients::HTTP.new(
url: configuration.url,
token: configuration.access_token
)
end
def allow_local_requests?
::Gitlab::CurrentSettings.allow_local_requests_from_web_hooks_and_services?
end
def headers
@headers ||= http_client.head(relative_url).headers
end
def validate_dir
raise(ServiceError, 'Invalid target directory') unless dir.start_with?(Dir.tmpdir)
end
def validate_symlink
if File.lstat(filepath).symlink?
File.delete(filepath)
raise(ServiceError, 'Invalid downloaded file')
end
end
def validate_url
::Gitlab::UrlBlocker.validate!(
http_client.resource_url(relative_url),
allow_localhost: allow_local_requests?,
allow_local_network: allow_local_requests?,
schemes: %w(http https)
)
end
def validate_content_length
validate_size!(headers['content-length'])
end
def validate_size!(size)
if size.blank?
raise ServiceError, 'Missing content-length header'
elsif size.to_i > file_size_limit
raise ServiceError, "File size %{size} exceeds limit of %{limit}" % {
size: ActiveSupport::NumberHelper.number_to_human_size(size),
limit: ActiveSupport::NumberHelper.number_to_human_size(file_size_limit)
}
end
end
def validate_content_type
content_type = headers['content-type']
raise(ServiceError, 'Invalid content type') if content_type.blank? || allowed_content_types.exclude?(content_type)
end
def filepath
@filepath ||= File.join(@dir, filename)
end
def filename
@filename.presence || remote_filename
end
# Fetch the remote filename information from the request content-disposition header
# - Raises if the filename does not exist
# - If the filename is longer then 255 chars truncate it
# to be a total of 255 chars (with the extension)
def remote_filename
@remote_filename ||=
headers['content-disposition'].to_s
.match(REMOTE_FILENAME_PATTERN) # matches the filename pattern
.then { |match| match&.named_captures || {} } # ensures the match is a hash
.fetch('filename') # fetches the 'filename' key or raise KeyError
.then(&File.method(:basename)) # Ensures to remove path from the filename (../ for instance)
.then(&method(:ensure_filename_size)) # Ensures the filename is within the FILENAME_SIZE_LIMIT
rescue KeyError
raise ServiceError, 'Remote filename not provided in content-disposition header'
end
def ensure_filename_size(filename)
if filename.length <= FILENAME_SIZE_LIMIT
filename
else
extname = File.extname(filename)
basename = File.basename(filename, extname)[0, FILENAME_SIZE_LIMIT]
"#{basename}#{extname}"
end
end
end
end
|