diff options
Diffstat (limited to 'app/services/bulk_imports/file_download_service.rb')
-rw-r--r-- | app/services/bulk_imports/file_download_service.rb | 68 |
1 files changed, 57 insertions, 11 deletions
diff --git a/app/services/bulk_imports/file_download_service.rb b/app/services/bulk_imports/file_download_service.rb index c5a1241e0a4..9a301c260a9 100644 --- a/app/services/bulk_imports/file_download_service.rb +++ b/app/services/bulk_imports/file_download_service.rb @@ -1,18 +1,20 @@ # frozen_string_literal: true +# Downloads a remote file. If no filename is given, it'll use the remote filename module BulkImports class FileDownloadService - FILE_SIZE_LIMIT = 5.gigabytes - ALLOWED_CONTENT_TYPES = %w(application/gzip application/octet-stream).freeze - ServiceError = Class.new(StandardError) - def initialize(configuration:, relative_url:, dir:, filename:) + REMOTE_FILENAME_PATTERN = %r{filename="(?<filename>[^"]+)"}.freeze + FILENAME_SIZE_LIMIT = 255 # chars before the extension + + def initialize(configuration:, relative_url:, dir:, file_size_limit:, allowed_content_types:, filename: nil) @configuration = configuration @relative_url = relative_url @filename = filename @dir = dir - @filepath = File.join(@dir, @filename) + @file_size_limit = file_size_limit + @allowed_content_types = allowed_content_types end def execute @@ -30,7 +32,7 @@ module BulkImports private - attr_reader :configuration, :relative_url, :dir, :filename, :filepath + attr_reader :configuration, :relative_url, :dir, :file_size_limit, :allowed_content_types def download_file File.open(filepath, 'wb') do |file| @@ -39,7 +41,7 @@ module BulkImports http_client.stream(relative_url) do |chunk| bytes_downloaded += chunk.size - raise(ServiceError, 'Invalid downloaded file') if bytes_downloaded > FILE_SIZE_LIMIT + validate_size!(bytes_downloaded) raise(ServiceError, "File download error #{chunk.code}") unless chunk.code == 200 file.write(chunk) @@ -53,7 +55,7 @@ module BulkImports def http_client @http_client ||= BulkImports::Clients::HTTP.new( - uri: configuration.url, + url: configuration.url, token: configuration.access_token ) end @@ -88,15 +90,59 @@ module BulkImports end def validate_content_length - content_size = headers['content-length'] + validate_size!(headers['content-length']) + end - raise(ServiceError, 'Invalid content length') if content_size.blank? || content_size.to_i > FILE_SIZE_LIMIT + def validate_size!(size) + if size.blank? + raise ServiceError, 'Missing content-length header' + elsif size.to_i > file_size_limit + raise ServiceError, "File size %{size} exceeds limit of %{limit}" % { + size: ActiveSupport::NumberHelper.number_to_human_size(size), + limit: ActiveSupport::NumberHelper.number_to_human_size(file_size_limit) + } + end end def validate_content_type content_type = headers['content-type'] - raise(ServiceError, 'Invalid content type') if content_type.blank? || ALLOWED_CONTENT_TYPES.exclude?(content_type) + raise(ServiceError, 'Invalid content type') if content_type.blank? || allowed_content_types.exclude?(content_type) + end + + def filepath + @filepath ||= File.join(@dir, filename) + end + + def filename + @filename.presence || remote_filename + end + + # Fetch the remote filename information from the request content-disposition header + # - Raises if the filename does not exist + # - If the filename is longer then 255 chars truncate it + # to be a total of 255 chars (with the extension) + def remote_filename + @remote_filename ||= + headers['content-disposition'].to_s + .match(REMOTE_FILENAME_PATTERN) # matches the filename pattern + .then { |match| match&.named_captures || {} } # ensures the match is a hash + .fetch('filename') # fetches the 'filename' key or raise KeyError + .then(&File.method(:basename)) # Ensures to remove path from the filename (../ for instance) + .then(&method(:ensure_filename_size)) # Ensures the filename is within the FILENAME_SIZE_LIMIT + rescue KeyError + raise ServiceError, 'Remote filename not provided in content-disposition header' + end + + def ensure_filename_size(filename) + if filename.length <= FILENAME_SIZE_LIMIT + filename + else + extname = File.extname(filename) + basename = File.basename(filename, extname)[0, FILENAME_SIZE_LIMIT] + + "#{basename}#{extname}" + end end end end |