diff options
author | Nobuto Murata <nobuto.murata@canonical.com> | 2021-07-08 01:36:38 +0900 |
---|---|---|
committer | Nobuto Murata <nobuto.murata@canonical.com> | 2021-07-08 21:07:35 +0900 |
commit | 32f9a1509bc94baa6acaf508c922f3b7edf5b65f (patch) | |
tree | e57c7b586e26aa494d759f5a80917e2547c5cf95 | |
parent | 98b4a0d4e76df022fa8a85a57b00b859b09e3274 (diff) | |
download | glance_store-2.6.0.tar.gz |
s3: Optimize WRITE_CHUNKSIZE to minimize an overhead2.6.0
When processing the image data from a client, the previous and small
chunk size had a significant overhead. Let's be aligned with other chunk
size related parameters such as DEFAULT_LARGE_OBJECT_MIN_CHUNK_SIZE
(=5MiB) and DEFAULT_LARGE_OBJECT_CHUNK_SIZE (=10MiB).
The performance difference should be tangible especially with a
singlepart upload. And the upload time can be shortened for example as
follows:
100 MiB: 36.5s -> 4.0s
200 MiB: 2m16.8s -> 6.1s
300 MiB: 4m50.4s -> 9.1s
Closes-Bug: 1934849
Change-Id: Icecac80dd9e4e9f7ffa76bb7ca63d8d112036b70
-rw-r--r-- | glance_store/_drivers/s3.py | 2 | ||||
-rw-r--r-- | glance_store/tests/unit/test_multistore_s3.py | 64 | ||||
-rw-r--r-- | glance_store/tests/unit/test_s3_store.py | 55 |
3 files changed, 116 insertions, 5 deletions
diff --git a/glance_store/_drivers/s3.py b/glance_store/_drivers/s3.py index 1c18531..6eeba86 100644 --- a/glance_store/_drivers/s3.py +++ b/glance_store/_drivers/s3.py @@ -361,7 +361,7 @@ class Store(glance_store.driver.Store): EXAMPLE_URL = "s3://<ACCESS_KEY>:<SECRET_KEY>@<S3_URL>/<BUCKET>/<OBJ>" READ_CHUNKSIZE = 64 * units.Ki - WRITE_CHUNKSIZE = READ_CHUNKSIZE + WRITE_CHUNKSIZE = 5 * units.Mi @staticmethod def get_schemes(): diff --git a/glance_store/tests/unit/test_multistore_s3.py b/glance_store/tests/unit/test_multistore_s3.py index d242b8f..2485e4a 100644 --- a/glance_store/tests/unit/test_multistore_s3.py +++ b/glance_store/tests/unit/test_multistore_s3.py @@ -44,7 +44,7 @@ S3_CONF = { 's3_store_secret_key': 'key', 's3_store_host': 'https://s3-region1.com', 's3_store_bucket': 'glance', - 's3_store_large_object_size': 5, # over 5MB is large + 's3_store_large_object_size': 9, # over 9MB is large 's3_store_large_object_chunk_size': 6, # part size is 6MB } @@ -91,7 +91,9 @@ class TestMultiS3Store(base.MultiStoreBaseTest, s3_store_secret_key='key', s3_store_host='https://s3-region1.com', s3_store_bucket='glance', - s3_store_large_object_size=5, + s3_store_large_object_size=S3_CONF[ + 's3_store_large_object_size' + ], s3_store_large_object_chunk_size=6) self.config(group='s3_region2', @@ -99,7 +101,9 @@ class TestMultiS3Store(base.MultiStoreBaseTest, s3_store_secret_key='key', s3_store_host='http://s3-region2.com', s3_store_bucket='glance', - s3_store_large_object_size=5, + s3_store_large_object_size=S3_CONF[ + 's3_store_large_object_size' + ], s3_store_large_object_chunk_size=6) # Ensure stores + locations cleared location.SCHEME_TO_CLS_BACKEND_MAP = {} @@ -204,6 +208,7 @@ class TestMultiS3Store(base.MultiStoreBaseTest, def test_add_singlepart(self, mock_client): """Test that we can add an image via the s3 backend.""" expected_image_id = str(uuid.uuid4()) + # 5KiB is smaller than WRITE_CHUNKSIZE expected_s3_size = FIVE_KB expected_s3_contents = b"*" * expected_s3_size expected_checksum = md5(expected_s3_contents, @@ -252,6 +257,59 @@ class TestMultiS3Store(base.MultiStoreBaseTest, self.assertEqual(expected_multihash, multihash) @mock.patch.object(boto3.session.Session, "client") + def test_add_singlepart_bigger_than_write_chunk(self, mock_client): + """Test that we can add an image via the s3 backend.""" + expected_image_id = str(uuid.uuid4()) + # 8 MiB is bigger than WRITE_CHUNKSIZE(=5MiB), + # but smaller than s3_store_large_object_size + expected_s3_size = 8 * units.Mi + expected_s3_contents = b"*" * expected_s3_size + expected_checksum = md5(expected_s3_contents, + usedforsecurity=False).hexdigest() + expected_multihash = hashlib.sha256(expected_s3_contents).hexdigest() + expected_location = format_s3_location( + S3_CONF['s3_store_access_key'], + S3_CONF['s3_store_secret_key'], + S3_CONF['s3_store_host'], + S3_CONF['s3_store_bucket'], + expected_image_id) + image_s3 = six.BytesIO(expected_s3_contents) + + fake_s3_client = botocore.session.get_session().create_client('s3') + + with stub.Stubber(fake_s3_client) as stubber: + stubber.add_response(method='head_bucket', + service_response={}, + expected_params={ + 'Bucket': S3_CONF['s3_store_bucket'] + }) + stubber.add_client_error(method='head_object', + service_error_code='404', + service_message='', + expected_params={ + 'Bucket': S3_CONF['s3_store_bucket'], + 'Key': expected_image_id + }) + stubber.add_response(method='put_object', + service_response={}, + expected_params={ + 'Bucket': S3_CONF['s3_store_bucket'], + 'Key': expected_image_id, + 'Body': botocore.stub.ANY + }) + + mock_client.return_value = fake_s3_client + loc, size, checksum, multihash, metadata = \ + self.store.add(expected_image_id, image_s3, expected_s3_size, + self.hash_algo) + self.assertEqual("s3_region1", metadata["store"]) + + self.assertEqual(expected_location, loc) + self.assertEqual(expected_s3_size, size) + self.assertEqual(expected_checksum, checksum) + self.assertEqual(expected_multihash, multihash) + + @mock.patch.object(boto3.session.Session, "client") def test_add_different_backend(self, mock_client): self.store = s3.Store(self.conf, backend="s3_region2") self.store.configure() diff --git a/glance_store/tests/unit/test_s3_store.py b/glance_store/tests/unit/test_s3_store.py index 2b95bfa..8a5f046 100644 --- a/glance_store/tests/unit/test_s3_store.py +++ b/glance_store/tests/unit/test_s3_store.py @@ -43,7 +43,7 @@ S3_CONF = { 's3_store_secret_key': 'key', 's3_store_host': 'localhost', 's3_store_bucket': 'glance', - 's3_store_large_object_size': 5, # over 5MB is large + 's3_store_large_object_size': 9, # over 9MB is large 's3_store_large_object_chunk_size': 6, # part size is 6MB } @@ -157,6 +157,7 @@ class TestStore(base.StoreBaseTest, def test_add_singlepart(self, mock_client): """Test that we can add an image via the s3 backend.""" expected_image_id = str(uuid.uuid4()) + # 5KiB is smaller than WRITE_CHUNKSIZE expected_s3_size = FIVE_KB expected_s3_contents = b"*" * expected_s3_size expected_checksum = md5(expected_s3_contents, @@ -204,6 +205,58 @@ class TestStore(base.StoreBaseTest, self.assertEqual(expected_multihash, multihash) @mock.patch.object(boto3.session.Session, "client") + def test_add_singlepart_bigger_than_write_chunk(self, mock_client): + """Test that we can add a large image via the s3 backend.""" + expected_image_id = str(uuid.uuid4()) + # 8 MiB is bigger than WRITE_CHUNKSIZE(=5MiB), + # but smaller than s3_store_large_object_size + expected_s3_size = 8 * units.Mi + expected_s3_contents = b"*" * expected_s3_size + expected_checksum = md5(expected_s3_contents, + usedforsecurity=False).hexdigest() + expected_multihash = hashlib.sha256(expected_s3_contents).hexdigest() + expected_location = format_s3_location( + S3_CONF['s3_store_access_key'], + S3_CONF['s3_store_secret_key'], + S3_CONF['s3_store_host'], + S3_CONF['s3_store_bucket'], + expected_image_id) + image_s3 = six.BytesIO(expected_s3_contents) + + fake_s3_client = botocore.session.get_session().create_client('s3') + + with stub.Stubber(fake_s3_client) as stubber: + stubber.add_response(method='head_bucket', + service_response={}, + expected_params={ + 'Bucket': S3_CONF['s3_store_bucket'] + }) + stubber.add_client_error(method='head_object', + service_error_code='404', + service_message='', + expected_params={ + 'Bucket': S3_CONF['s3_store_bucket'], + 'Key': expected_image_id + }) + stubber.add_response(method='put_object', + service_response={}, + expected_params={ + 'Bucket': S3_CONF['s3_store_bucket'], + 'Key': expected_image_id, + 'Body': botocore.stub.ANY + }) + + mock_client.return_value = fake_s3_client + loc, size, checksum, multihash, _ = \ + self.store.add(expected_image_id, image_s3, expected_s3_size, + self.hash_algo) + + self.assertEqual(expected_location, loc) + self.assertEqual(expected_s3_size, size) + self.assertEqual(expected_checksum, checksum) + self.assertEqual(expected_multihash, multihash) + + @mock.patch.object(boto3.session.Session, "client") def test_add_with_verifier(self, mock_client): """Assert 'verifier.update' is called when verifier is provided""" expected_image_id = str(uuid.uuid4()) |