diff options
author | Grzegorz Bizon <grzegorz@gitlab.com> | 2018-06-05 08:42:20 +0000 |
---|---|---|
committer | Grzegorz Bizon <grzegorz@gitlab.com> | 2018-06-05 08:42:20 +0000 |
commit | e11a1001dcdc1ea5c65845fb0897b861b5c0b92d (patch) | |
tree | 27932bbdd58a1322b6b2c3fd82738c8330ecdec2 | |
parent | 8576e45f28737f1e9f6b0b6e5649745c76751b06 (diff) | |
parent | eea26a93e7d4ac0c6fefe46592c9baa0d3e6a5cd (diff) | |
download | gitlab-ce-e11a1001dcdc1ea5c65845fb0897b861b5c0b92d.tar.gz |
Merge branch 'presigned-multipart-uploads' into 'master'
Support presigned multipart uploads
See merge request gitlab-org/gitlab-ce!18855
-rw-r--r-- | app/controllers/projects/lfs_storage_controller.rb | 2 | ||||
-rw-r--r-- | app/uploaders/object_storage.rb | 21 | ||||
-rw-r--r-- | changelogs/unreleased/presigned-multipart-uploads.yml | 5 | ||||
-rw-r--r-- | config/initializers/artifacts_direct_upload_support.rb | 7 | ||||
-rw-r--r-- | config/initializers/direct_upload_support.rb | 19 | ||||
-rw-r--r-- | doc/administration/job_artifacts.md | 3 | ||||
-rw-r--r-- | lib/api/runner.rb | 2 | ||||
-rw-r--r-- | lib/object_storage/direct_upload.rb | 166 | ||||
-rw-r--r-- | spec/initializers/artifacts_direct_upload_support_spec.rb | 71 | ||||
-rw-r--r-- | spec/initializers/direct_upload_support_spec.rb | 90 | ||||
-rw-r--r-- | spec/lib/object_storage/direct_upload_spec.rb | 164 | ||||
-rw-r--r-- | spec/requests/api/runner_spec.rb | 1 | ||||
-rw-r--r-- | spec/requests/lfs_http_spec.rb | 1 | ||||
-rw-r--r-- | spec/support/helpers/stub_object_storage.rb | 12 | ||||
-rw-r--r-- | spec/uploaders/object_storage_spec.rb | 134 |
15 files changed, 575 insertions, 123 deletions
diff --git a/app/controllers/projects/lfs_storage_controller.rb b/app/controllers/projects/lfs_storage_controller.rb index 43d8867a536..45c98d60822 100644 --- a/app/controllers/projects/lfs_storage_controller.rb +++ b/app/controllers/projects/lfs_storage_controller.rb @@ -18,7 +18,7 @@ class Projects::LfsStorageController < Projects::GitHttpClientController def upload_authorize set_workhorse_internal_api_content_type - authorized = LfsObjectUploader.workhorse_authorize + authorized = LfsObjectUploader.workhorse_authorize(has_length: true) authorized.merge!(LfsOid: oid, LfsSize: size) render json: authorized diff --git a/app/uploaders/object_storage.rb b/app/uploaders/object_storage.rb index 5bdca26a584..3bb2e1ea63a 100644 --- a/app/uploaders/object_storage.rb +++ b/app/uploaders/object_storage.rb @@ -10,8 +10,6 @@ module ObjectStorage UnknownStoreError = Class.new(StandardError) ObjectStorageUnavailable = Class.new(StandardError) - DIRECT_UPLOAD_TIMEOUT = 4.hours - DIRECT_UPLOAD_EXPIRE_OFFSET = 15.minutes TMP_UPLOAD_PATH = 'tmp/uploads'.freeze module Store @@ -157,9 +155,9 @@ module ObjectStorage model_class.uploader_options.dig(mount_point, :mount_on) || mount_point end - def workhorse_authorize + def workhorse_authorize(has_length:, maximum_size: nil) { - RemoteObject: workhorse_remote_upload_options, + RemoteObject: workhorse_remote_upload_options(has_length: has_length, maximum_size: maximum_size), TempPath: workhorse_local_upload_path }.compact end @@ -168,23 +166,16 @@ module ObjectStorage File.join(self.root, TMP_UPLOAD_PATH) end - def workhorse_remote_upload_options + def workhorse_remote_upload_options(has_length:, maximum_size: nil) return unless self.object_store_enabled? return unless self.direct_upload_enabled? id = [CarrierWave.generate_cache_id, SecureRandom.hex].join('-') upload_path = File.join(TMP_UPLOAD_PATH, id) - connection = ::Fog::Storage.new(self.object_store_credentials) - expire_at = Time.now + DIRECT_UPLOAD_TIMEOUT + DIRECT_UPLOAD_EXPIRE_OFFSET - options = { 'Content-Type' => 'application/octet-stream' } + direct_upload = ObjectStorage::DirectUpload.new(self.object_store_credentials, remote_store_path, upload_path, + has_length: has_length, maximum_size: maximum_size) - { - ID: id, - Timeout: DIRECT_UPLOAD_TIMEOUT, - GetURL: connection.get_object_url(remote_store_path, upload_path, expire_at), - DeleteURL: connection.delete_object_url(remote_store_path, upload_path, expire_at), - StoreURL: connection.put_object_url(remote_store_path, upload_path, expire_at, options) - } + direct_upload.to_hash.merge(ID: id) end end diff --git a/changelogs/unreleased/presigned-multipart-uploads.yml b/changelogs/unreleased/presigned-multipart-uploads.yml new file mode 100644 index 00000000000..52fae6534fd --- /dev/null +++ b/changelogs/unreleased/presigned-multipart-uploads.yml @@ -0,0 +1,5 @@ +--- +title: Support direct_upload with S3 Multipart uploads +merge_request: +author: +type: added diff --git a/config/initializers/artifacts_direct_upload_support.rb b/config/initializers/artifacts_direct_upload_support.rb deleted file mode 100644 index d2bc35ea613..00000000000 --- a/config/initializers/artifacts_direct_upload_support.rb +++ /dev/null @@ -1,7 +0,0 @@ -artifacts_object_store = Gitlab.config.artifacts.object_store - -if artifacts_object_store.enabled && - artifacts_object_store.direct_upload && - artifacts_object_store.connection&.provider.to_s != 'Google' - raise "Only 'Google' is supported as a object storage provider when 'direct_upload' of artifacts is used" -end diff --git a/config/initializers/direct_upload_support.rb b/config/initializers/direct_upload_support.rb new file mode 100644 index 00000000000..32fc8c8bc69 --- /dev/null +++ b/config/initializers/direct_upload_support.rb @@ -0,0 +1,19 @@ +class DirectUploadsValidator + SUPPORTED_DIRECT_UPLOAD_PROVIDERS = %w(Google AWS).freeze + + ValidationError = Class.new(StandardError) + + def verify!(object_store) + return unless object_store.enabled + return unless object_store.direct_upload + return if SUPPORTED_DIRECT_UPLOAD_PROVIDERS.include?(object_store.connection&.provider.to_s) + + raise ValidationError, "Only #{SUPPORTED_DIRECT_UPLOAD_PROVIDERS.join(',')} are supported as a object storage provider when 'direct_upload' is used" + end +end + +DirectUploadsValidator.new.tap do |validator| + [Gitlab.config.artifacts, Gitlab.config.uploads, Gitlab.config.lfs].each do |uploader| + validator.verify!(uploader.object_store) + end +end diff --git a/doc/administration/job_artifacts.md b/doc/administration/job_artifacts.md index 77fe4d561a1..e59ab5a72e1 100644 --- a/doc/administration/job_artifacts.md +++ b/doc/administration/job_artifacts.md @@ -94,6 +94,7 @@ _The artifacts are stored by default in > Available in [GitLab Premium](https://about.gitlab.com/products/) and [GitLab.com Silver](https://about.gitlab.com/gitlab-com/). > Since version 10.6, available in [GitLab CE](https://about.gitlab.com/products/) +> Since version 11.0, we support direct_upload to S3. If you don't want to use the local disk where GitLab is installed to store the artifacts, you can use an object storage like AWS S3 instead. @@ -108,7 +109,7 @@ For source installations the following settings are nested under `artifacts:` an |---------|-------------|---------| | `enabled` | Enable/disable object storage | `false` | | `remote_directory` | The bucket name where Artifacts will be stored| | -| `direct_upload` | Set to true to enable direct upload of Artifacts without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. Currently only `Google` provider is supported | `false` | +| `direct_upload` | Set to true to enable direct upload of Artifacts without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. | `false` | | `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` | | `proxy_download` | Set to true to enable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data | `false` | | `connection` | Various connection options described below | | diff --git a/lib/api/runner.rb b/lib/api/runner.rb index e9886c76870..db502697a19 100644 --- a/lib/api/runner.rb +++ b/lib/api/runner.rb @@ -205,7 +205,7 @@ module API status 200 content_type Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE - JobArtifactUploader.workhorse_authorize + JobArtifactUploader.workhorse_authorize(has_length: false, maximum_size: max_artifacts_size) end desc 'Upload artifacts for job' do diff --git a/lib/object_storage/direct_upload.rb b/lib/object_storage/direct_upload.rb new file mode 100644 index 00000000000..61a69e7ffe4 --- /dev/null +++ b/lib/object_storage/direct_upload.rb @@ -0,0 +1,166 @@ +module ObjectStorage + # + # The DirectUpload c;ass generates a set of presigned URLs + # that can be used to upload data to object storage from untrusted component: Workhorse, Runner? + # + # For Google it assumes that the platform supports variable Content-Length. + # + # For AWS it initiates Multipart Upload and presignes a set of part uploads. + # Class calculates the best part size to be able to upload up to asked maximum size. + # The number of generated parts will never go above 100, + # but we will always try to reduce amount of generated parts. + # The part size is rounded-up to 5MB. + # + class DirectUpload + include Gitlab::Utils::StrongMemoize + + TIMEOUT = 4.hours + EXPIRE_OFFSET = 15.minutes + + MAXIMUM_MULTIPART_PARTS = 100 + MINIMUM_MULTIPART_SIZE = 5.megabytes + + attr_reader :credentials, :bucket_name, :object_name + attr_reader :has_length, :maximum_size + + def initialize(credentials, bucket_name, object_name, has_length:, maximum_size: nil) + unless has_length + raise ArgumentError, 'maximum_size has to be specified if length is unknown' unless maximum_size + end + + @credentials = credentials + @bucket_name = bucket_name + @object_name = object_name + @has_length = has_length + @maximum_size = maximum_size + end + + def to_hash + { + Timeout: TIMEOUT, + GetURL: get_url, + StoreURL: store_url, + DeleteURL: delete_url, + MultipartUpload: multipart_upload_hash + }.compact + end + + def multipart_upload_hash + return unless requires_multipart_upload? + + { + PartSize: rounded_multipart_part_size, + PartURLs: multipart_part_urls, + CompleteURL: multipart_complete_url, + AbortURL: multipart_abort_url + } + end + + def provider + credentials[:provider].to_s + end + + # Implements https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectGET.html + def get_url + connection.get_object_url(bucket_name, object_name, expire_at) + end + + # Implements https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectDELETE.html + def delete_url + connection.delete_object_url(bucket_name, object_name, expire_at) + end + + # Implements https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPUT.html + def store_url + connection.put_object_url(bucket_name, object_name, expire_at, upload_options) + end + + def multipart_part_urls + Array.new(number_of_multipart_parts) do |part_index| + multipart_part_upload_url(part_index + 1) + end + end + + # Implements https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadUploadPart.html + def multipart_part_upload_url(part_number) + connection.signed_url({ + method: 'PUT', + bucket_name: bucket_name, + object_name: object_name, + query: { uploadId: upload_id, partNumber: part_number }, + headers: upload_options + }, expire_at) + end + + # Implements https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadComplete.html + def multipart_complete_url + connection.signed_url({ + method: 'POST', + bucket_name: bucket_name, + object_name: object_name, + query: { uploadId: upload_id }, + headers: { 'Content-Type' => 'application/xml' } + }, expire_at) + end + + # Implements https://docs.aws.amazon.com/AmazonS3/latest/API/mpUploadAbort.html + def multipart_abort_url + connection.signed_url({ + method: 'DELETE', + bucket_name: bucket_name, + object_name: object_name, + query: { uploadId: upload_id } + }, expire_at) + end + + private + + def rounded_multipart_part_size + # round multipart_part_size up to minimum_mulitpart_size + (multipart_part_size + MINIMUM_MULTIPART_SIZE - 1) / MINIMUM_MULTIPART_SIZE * MINIMUM_MULTIPART_SIZE + end + + def multipart_part_size + maximum_size / number_of_multipart_parts + end + + def number_of_multipart_parts + [ + # round maximum_size up to minimum_mulitpart_size + (maximum_size + MINIMUM_MULTIPART_SIZE - 1) / MINIMUM_MULTIPART_SIZE, + MAXIMUM_MULTIPART_PARTS + ].min + end + + def aws? + provider == 'AWS' + end + + def requires_multipart_upload? + aws? && !has_length + end + + def upload_id + return unless requires_multipart_upload? + + strong_memoize(:upload_id) do + new_upload = connection.initiate_multipart_upload(bucket_name, object_name) + new_upload.body["UploadId"] + end + end + + def expire_at + strong_memoize(:expire_at) do + Time.now + TIMEOUT + EXPIRE_OFFSET + end + end + + def upload_options + { 'Content-Type' => 'application/octet-stream' } + end + + def connection + @connection ||= ::Fog::Storage.new(credentials) + end + end +end diff --git a/spec/initializers/artifacts_direct_upload_support_spec.rb b/spec/initializers/artifacts_direct_upload_support_spec.rb deleted file mode 100644 index bfb71da3388..00000000000 --- a/spec/initializers/artifacts_direct_upload_support_spec.rb +++ /dev/null @@ -1,71 +0,0 @@ -require 'spec_helper' - -describe 'Artifacts direct upload support' do - subject do - load Rails.root.join('config/initializers/artifacts_direct_upload_support.rb') - end - - let(:connection) do - { provider: provider } - end - - before do - stub_artifacts_setting( - object_store: { - enabled: enabled, - direct_upload: direct_upload, - connection: connection - }) - end - - context 'when object storage is enabled' do - let(:enabled) { true } - - context 'when direct upload is enabled' do - let(:direct_upload) { true } - - context 'when provider is Google' do - let(:provider) { 'Google' } - - it 'succeeds' do - expect { subject }.not_to raise_error - end - end - - context 'when connection is empty' do - let(:connection) { nil } - - it 'raises an error' do - expect { subject }.to raise_error /object storage provider when 'direct_upload' of artifacts is used/ - end - end - - context 'when other provider is used' do - let(:provider) { 'AWS' } - - it 'raises an error' do - expect { subject }.to raise_error /object storage provider when 'direct_upload' of artifacts is used/ - end - end - end - - context 'when direct upload is disabled' do - let(:direct_upload) { false } - let(:provider) { 'AWS' } - - it 'succeeds' do - expect { subject }.not_to raise_error - end - end - end - - context 'when object storage is disabled' do - let(:enabled) { false } - let(:direct_upload) { false } - let(:provider) { 'AWS' } - - it 'succeeds' do - expect { subject }.not_to raise_error - end - end -end diff --git a/spec/initializers/direct_upload_support_spec.rb b/spec/initializers/direct_upload_support_spec.rb new file mode 100644 index 00000000000..e51d404e030 --- /dev/null +++ b/spec/initializers/direct_upload_support_spec.rb @@ -0,0 +1,90 @@ +require 'spec_helper' + +describe 'Direct upload support' do + subject do + load Rails.root.join('config/initializers/direct_upload_support.rb') + end + + where(:config_name) do + %w(lfs artifacts uploads) + end + + with_them do + let(:connection) do + { provider: provider } + end + + let(:object_store) do + { + enabled: enabled, + direct_upload: direct_upload, + connection: connection + } + end + + before do + allow(Gitlab.config).to receive_messages(to_settings(config_name => { + object_store: object_store + })) + end + + context 'when object storage is enabled' do + let(:enabled) { true } + + context 'when direct upload is enabled' do + let(:direct_upload) { true } + + context 'when provider is AWS' do + let(:provider) { 'AWS' } + + it 'succeeds' do + expect { subject }.not_to raise_error + end + end + + context 'when provider is Google' do + let(:provider) { 'Google' } + + it 'succeeds' do + expect { subject }.not_to raise_error + end + end + + context 'when connection is empty' do + let(:connection) { nil } + + it 'raises an error' do + expect { subject }.to raise_error /are supported as a object storage provider when 'direct_upload' is used/ + end + end + + context 'when other provider is used' do + let(:provider) { 'Rackspace' } + + it 'raises an error' do + expect { subject }.to raise_error /are supported as a object storage provider when 'direct_upload' is used/ + end + end + end + + context 'when direct upload is disabled' do + let(:direct_upload) { false } + let(:provider) { 'AWS' } + + it 'succeeds' do + expect { subject }.not_to raise_error + end + end + end + + context 'when object storage is disabled' do + let(:enabled) { false } + let(:direct_upload) { false } + let(:provider) { 'Rackspace' } + + it 'succeeds' do + expect { subject }.not_to raise_error + end + end + end +end diff --git a/spec/lib/object_storage/direct_upload_spec.rb b/spec/lib/object_storage/direct_upload_spec.rb new file mode 100644 index 00000000000..5187821e8f4 --- /dev/null +++ b/spec/lib/object_storage/direct_upload_spec.rb @@ -0,0 +1,164 @@ +require 'spec_helper' + +describe ObjectStorage::DirectUpload do + let(:credentials) do + { + provider: 'AWS', + aws_access_key_id: 'AWS_ACCESS_KEY_ID', + aws_secret_access_key: 'AWS_SECRET_ACCESS_KEY' + } + end + + let(:storage_url) { 'https://uploads.s3.amazonaws.com/' } + + let(:bucket_name) { 'uploads' } + let(:object_name) { 'tmp/uploads/my-file' } + let(:maximum_size) { 1.gigabyte } + + let(:direct_upload) { described_class.new(credentials, bucket_name, object_name, has_length: has_length, maximum_size: maximum_size) } + + describe '#has_length' do + context 'is known' do + let(:has_length) { true } + let(:maximum_size) { nil } + + it "maximum size is not required" do + expect { direct_upload }.not_to raise_error + end + end + + context 'is unknown' do + let(:has_length) { false } + + context 'and maximum size is specified' do + let(:maximum_size) { 1.gigabyte } + + it "does not raise an error" do + expect { direct_upload }.not_to raise_error + end + end + + context 'and maximum size is not specified' do + let(:maximum_size) { nil } + + it "raises an error" do + expect { direct_upload }.to raise_error /maximum_size has to be specified if length is unknown/ + end + end + end + end + + describe '#to_hash' do + subject { direct_upload.to_hash } + + shared_examples 'a valid upload' do + it "returns valid structure" do + expect(subject).to have_key(:Timeout) + expect(subject[:GetURL]).to start_with(storage_url) + expect(subject[:StoreURL]).to start_with(storage_url) + expect(subject[:DeleteURL]).to start_with(storage_url) + end + end + + shared_examples 'a valid upload with multipart data' do + before do + stub_object_storage_multipart_init(storage_url, "myUpload") + end + + it_behaves_like 'a valid upload' + + it "returns valid structure" do + expect(subject).to have_key(:MultipartUpload) + expect(subject[:MultipartUpload]).to have_key(:PartSize) + expect(subject[:MultipartUpload][:PartURLs]).to all(start_with(storage_url)) + expect(subject[:MultipartUpload][:PartURLs]).to all(include('uploadId=myUpload')) + expect(subject[:MultipartUpload][:CompleteURL]).to start_with(storage_url) + expect(subject[:MultipartUpload][:CompleteURL]).to include('uploadId=myUpload') + expect(subject[:MultipartUpload][:AbortURL]).to start_with(storage_url) + expect(subject[:MultipartUpload][:AbortURL]).to include('uploadId=myUpload') + end + end + + shared_examples 'a valid upload without multipart data' do + it_behaves_like 'a valid upload' + + it "returns valid structure" do + expect(subject).not_to have_key(:MultipartUpload) + end + end + + context 'when AWS is used' do + context 'when length is known' do + let(:has_length) { true } + + it_behaves_like 'a valid upload without multipart data' + end + + context 'when length is unknown' do + let(:has_length) { false } + + it_behaves_like 'a valid upload with multipart data' do + context 'when maximum upload size is 10MB' do + let(:maximum_size) { 10.megabyte } + + it 'returns only 2 parts' do + expect(subject[:MultipartUpload][:PartURLs].length).to eq(2) + end + + it 'part size is mimimum, 5MB' do + expect(subject[:MultipartUpload][:PartSize]).to eq(5.megabyte) + end + end + + context 'when maximum upload size is 12MB' do + let(:maximum_size) { 12.megabyte } + + it 'returns only 3 parts' do + expect(subject[:MultipartUpload][:PartURLs].length).to eq(3) + end + + it 'part size is rounded-up to 5MB' do + expect(subject[:MultipartUpload][:PartSize]).to eq(5.megabyte) + end + end + + context 'when maximum upload size is 49GB' do + let(:maximum_size) { 49.gigabyte } + + it 'returns maximum, 100 parts' do + expect(subject[:MultipartUpload][:PartURLs].length).to eq(100) + end + + it 'part size is rounded-up to 5MB' do + expect(subject[:MultipartUpload][:PartSize]).to eq(505.megabyte) + end + end + end + end + end + + context 'when Google is used' do + let(:credentials) do + { + provider: 'Google', + google_storage_access_key_id: 'GOOGLE_ACCESS_KEY_ID', + google_storage_secret_access_key: 'GOOGLE_SECRET_ACCESS_KEY' + } + end + + let(:storage_url) { 'https://storage.googleapis.com/uploads/' } + + context 'when length is known' do + let(:has_length) { true } + + it_behaves_like 'a valid upload without multipart data' + end + + context 'when length is unknown' do + let(:has_length) { false } + + it_behaves_like 'a valid upload without multipart data' + end + end + end +end diff --git a/spec/requests/api/runner_spec.rb b/spec/requests/api/runner_spec.rb index 319ac389083..c981a10ac38 100644 --- a/spec/requests/api/runner_spec.rb +++ b/spec/requests/api/runner_spec.rb @@ -1101,6 +1101,7 @@ describe API::Runner, :clean_gitlab_redis_shared_state do expect(json_response['RemoteObject']).to have_key('GetURL') expect(json_response['RemoteObject']).to have_key('StoreURL') expect(json_response['RemoteObject']).to have_key('DeleteURL') + expect(json_response['RemoteObject']).to have_key('MultipartUpload') end end diff --git a/spec/requests/lfs_http_spec.rb b/spec/requests/lfs_http_spec.rb index 79672fe1cc5..4d30b99262e 100644 --- a/spec/requests/lfs_http_spec.rb +++ b/spec/requests/lfs_http_spec.rb @@ -1021,6 +1021,7 @@ describe 'Git LFS API and storage' do expect(json_response['RemoteObject']).to have_key('GetURL') expect(json_response['RemoteObject']).to have_key('StoreURL') expect(json_response['RemoteObject']).to have_key('DeleteURL') + expect(json_response['RemoteObject']).not_to have_key('MultipartUpload') expect(json_response['LfsOid']).to eq(sample_oid) expect(json_response['LfsSize']).to eq(sample_size) end diff --git a/spec/support/helpers/stub_object_storage.rb b/spec/support/helpers/stub_object_storage.rb index 19d744b959a..bceaf8277ee 100644 --- a/spec/support/helpers/stub_object_storage.rb +++ b/spec/support/helpers/stub_object_storage.rb @@ -45,4 +45,16 @@ module StubObjectStorage remote_directory: 'uploads', **params) end + + def stub_object_storage_multipart_init(endpoint, upload_id = "upload_id") + stub_request(:post, %r{\A#{endpoint}tmp/uploads/[a-z0-9-]*\?uploads\z}) + .to_return status: 200, body: <<-EOS.strip_heredoc + <?xml version="1.0" encoding="UTF-8"?> + <InitiateMultipartUploadResult xmlns="http://s3.amazonaws.com/doc/2006-03-01/"> + <Bucket>example-bucket</Bucket> + <Key>example-object</Key> + <UploadId>#{upload_id}</UploadId> + </InitiateMultipartUploadResult> + EOS + end end diff --git a/spec/uploaders/object_storage_spec.rb b/spec/uploaders/object_storage_spec.rb index 2dd0925a8e6..01166865e88 100644 --- a/spec/uploaders/object_storage_spec.rb +++ b/spec/uploaders/object_storage_spec.rb @@ -355,7 +355,10 @@ describe ObjectStorage do end describe '.workhorse_authorize' do - subject { uploader_class.workhorse_authorize } + let(:has_length) { true } + let(:maximum_size) { nil } + + subject { uploader_class.workhorse_authorize(has_length: has_length, maximum_size: maximum_size) } before do # ensure that we use regular Fog libraries @@ -371,10 +374,6 @@ describe ObjectStorage do expect(subject[:TempPath]).to start_with(uploader_class.root) expect(subject[:TempPath]).to include(described_class::TMP_UPLOAD_PATH) end - - it "does not return remote store" do - is_expected.not_to have_key('RemoteObject') - end end shared_examples 'uses remote storage' do @@ -383,7 +382,7 @@ describe ObjectStorage do expect(subject[:RemoteObject]).to have_key(:ID) expect(subject[:RemoteObject]).to include(Timeout: a_kind_of(Integer)) - expect(subject[:RemoteObject][:Timeout]).to be(ObjectStorage::DIRECT_UPLOAD_TIMEOUT) + expect(subject[:RemoteObject][:Timeout]).to be(ObjectStorage::DirectUpload::TIMEOUT) expect(subject[:RemoteObject]).to have_key(:GetURL) expect(subject[:RemoteObject]).to have_key(:DeleteURL) expect(subject[:RemoteObject]).to have_key(:StoreURL) @@ -391,9 +390,31 @@ describe ObjectStorage do expect(subject[:RemoteObject][:DeleteURL]).to include(described_class::TMP_UPLOAD_PATH) expect(subject[:RemoteObject][:StoreURL]).to include(described_class::TMP_UPLOAD_PATH) end + end - it "does not return local store" do - is_expected.not_to have_key('TempPath') + shared_examples 'uses remote storage with multipart uploads' do + it_behaves_like 'uses remote storage' do + it "returns multipart upload" do + is_expected.to have_key(:RemoteObject) + + expect(subject[:RemoteObject]).to have_key(:MultipartUpload) + expect(subject[:RemoteObject][:MultipartUpload]).to have_key(:PartSize) + expect(subject[:RemoteObject][:MultipartUpload]).to have_key(:PartURLs) + expect(subject[:RemoteObject][:MultipartUpload]).to have_key(:CompleteURL) + expect(subject[:RemoteObject][:MultipartUpload]).to have_key(:AbortURL) + expect(subject[:RemoteObject][:MultipartUpload][:PartURLs]).to all(include(described_class::TMP_UPLOAD_PATH)) + expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to include(described_class::TMP_UPLOAD_PATH) + expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to include(described_class::TMP_UPLOAD_PATH) + end + end + end + + shared_examples 'uses remote storage without multipart uploads' do + it_behaves_like 'uses remote storage' do + it "does not return multipart upload" do + is_expected.to have_key(:RemoteObject) + expect(subject[:RemoteObject]).not_to have_key(:MultipartUpload) + end end end @@ -416,6 +437,8 @@ describe ObjectStorage do end context 'uses AWS' do + let(:storage_url) { "https://uploads.s3-eu-central-1.amazonaws.com/" } + before do expect(uploader_class).to receive(:object_store_credentials) do { provider: "AWS", @@ -425,18 +448,40 @@ describe ObjectStorage do end end - it_behaves_like 'uses remote storage' do - let(:storage_url) { "https://uploads.s3-eu-central-1.amazonaws.com/" } + context 'for known length' do + it_behaves_like 'uses remote storage without multipart uploads' do + it 'returns links for S3' do + expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + end + end + end + + context 'for unknown length' do + let(:has_length) { false } + let(:maximum_size) { 1.gigabyte } - it 'returns links for S3' do - expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) - expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) - expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + before do + stub_object_storage_multipart_init(storage_url) + end + + it_behaves_like 'uses remote storage with multipart uploads' do + it 'returns links for S3' do + expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:MultipartUpload][:PartURLs]).to all(start_with(storage_url)) + expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to start_with(storage_url) + end end end end context 'uses Google' do + let(:storage_url) { "https://storage.googleapis.com/uploads/" } + before do expect(uploader_class).to receive(:object_store_credentials) do { provider: "Google", @@ -445,36 +490,71 @@ describe ObjectStorage do end end - it_behaves_like 'uses remote storage' do - let(:storage_url) { "https://storage.googleapis.com/uploads/" } + context 'for known length' do + it_behaves_like 'uses remote storage without multipart uploads' do + it 'returns links for Google Cloud' do + expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + end + end + end + + context 'for unknown length' do + let(:has_length) { false } + let(:maximum_size) { 1.gigabyte } - it 'returns links for Google Cloud' do - expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) - expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) - expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + it_behaves_like 'uses remote storage without multipart uploads' do + it 'returns links for Google Cloud' do + expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + end end end end context 'uses GDK/minio' do + let(:storage_url) { "http://minio:9000/uploads/" } + before do expect(uploader_class).to receive(:object_store_credentials) do { provider: "AWS", aws_access_key_id: "AWS_ACCESS_KEY_ID", aws_secret_access_key: "AWS_SECRET_ACCESS_KEY", - endpoint: 'http://127.0.0.1:9000', + endpoint: 'http://minio:9000', path_style: true, region: "gdk" } end end - it_behaves_like 'uses remote storage' do - let(:storage_url) { "http://127.0.0.1:9000/uploads/" } + context 'for known length' do + it_behaves_like 'uses remote storage without multipart uploads' do + it 'returns links for S3' do + expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + end + end + end + + context 'for unknown length' do + let(:has_length) { false } + let(:maximum_size) { 1.gigabyte } - it 'returns links for S3' do - expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) - expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) - expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + before do + stub_object_storage_multipart_init(storage_url) + end + + it_behaves_like 'uses remote storage with multipart uploads' do + it 'returns links for S3' do + expect(subject[:RemoteObject][:GetURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:DeleteURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:StoreURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:MultipartUpload][:PartURLs]).to all(start_with(storage_url)) + expect(subject[:RemoteObject][:MultipartUpload][:CompleteURL]).to start_with(storage_url) + expect(subject[:RemoteObject][:MultipartUpload][:AbortURL]).to start_with(storage_url) + end end end end |