summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Wakefield <daniel.wakefield@hp.com>2014-10-29 10:11:29 +0000
committerDaniel Wakefield <daniel.wakefield@hp.com>2015-02-19 11:21:32 +0000
commitf0300e3714e0d8cdd0ed9e4744b813e3b263d034 (patch)
treeeb7e6c155b70d06338812c01dfe417f1fd2f0e68
parent45cce75e505043bc3aae0a7e889cb1272d80915b (diff)
downloadpython-swiftclient-f0300e3714e0d8cdd0ed9e4744b813e3b263d034.tar.gz
Verify MD5 of uploaded objects.
Changed existing code to calculate the MD5 of the object during the upload stream. Checks this MD5 against the etag returned in the response. An exception is raised if they do not match. Closes-Bug: 1379263 Change-Id: I6c8bc1366dfb591a26d934a30cd21c9e6b9a04ce
-rw-r--r--swiftclient/client.py40
-rw-r--r--swiftclient/service.py46
-rw-r--r--swiftclient/utils.py96
-rw-r--r--tests/unit/test_service.py271
-rw-r--r--tests/unit/test_shell.py6
-rw-r--r--tests/unit/test_swiftclient.py134
-rw-r--r--tests/unit/test_utils.py106
-rw-r--r--tests/unit/utils.py2
8 files changed, 603 insertions, 98 deletions
diff --git a/swiftclient/client.py b/swiftclient/client.py
index d0ff52e..985cad8 100644
--- a/swiftclient/client.py
+++ b/swiftclient/client.py
@@ -36,7 +36,7 @@ import six
from swiftclient import version as swiftclient_version
from swiftclient.exceptions import ClientException
-from swiftclient.utils import LengthWrapper
+from swiftclient.utils import LengthWrapper, ReadableToIterable
AUTH_VERSIONS_V1 = ('1.0', '1', 1)
AUTH_VERSIONS_V2 = ('2.0', '2', 2)
@@ -333,8 +333,8 @@ def get_auth_keystone(auth_url, user, key, os_options, **kwargs):
except exceptions.Unauthorized:
msg = 'Unauthorized. Check username, password and tenant name/id.'
if auth_version in AUTH_VERSIONS_V3:
- msg = 'Unauthorized. Check username/id, password, ' \
- + 'tenant name/id and user/tenant domain name/id.'
+ msg = ('Unauthorized. Check username/id, password, '
+ 'tenant name/id and user/tenant domain name/id.')
raise ClientException(msg)
except exceptions.AuthorizationFailure as err:
raise ClientException('Authorization Failure. %s' % err)
@@ -388,8 +388,7 @@ def get_auth(auth_url, user, key, **kwargs):
# We are handling a special use case here where the user argument
# specifies both the user name and tenant name in the form tenant:user
if user and not kwargs.get('tenant_name') and ':' in user:
- (os_options['tenant_name'],
- user) = user.split(':')
+ os_options['tenant_name'], user = user.split(':')
# We are allowing to have an tenant_name argument in get_auth
# directly without having os_options
@@ -929,7 +928,8 @@ def put_object(url, token=None, container=None, name=None, contents=None,
container name is expected to be part of the url
:param name: object name to put; if None, the object name is expected to be
part of the url
- :param contents: a string or a file like object to read object data from;
+ :param contents: a string, a file like object or an iterable
+ to read object data from;
if None, a zero-byte put will be done
:param content_length: value to send as content-length header; also limits
the amount read from contents; if None, it will be
@@ -983,27 +983,26 @@ def put_object(url, token=None, container=None, name=None, contents=None,
headers['Content-Type'] = ''
if not contents:
headers['Content-Length'] = '0'
- if hasattr(contents, 'read'):
+
+ if isinstance(contents, (ReadableToIterable, LengthWrapper)):
+ conn.putrequest(path, headers=headers, data=contents)
+ elif hasattr(contents, 'read'):
if chunk_size is None:
chunk_size = 65536
+
if content_length is None:
- def chunk_reader():
- while True:
- data = contents.read(chunk_size)
- if not data:
- break
- yield data
- conn.putrequest(path, headers=headers, data=chunk_reader())
+ data = ReadableToIterable(contents, chunk_size, md5=False)
else:
- # Fixes https://github.com/kennethreitz/requests/issues/1648
- data = LengthWrapper(contents, content_length)
- conn.putrequest(path, headers=headers, data=data)
+ data = LengthWrapper(contents, content_length, md5=False)
+
+ conn.putrequest(path, headers=headers, data=data)
else:
if chunk_size is not None:
- warn_msg = '%s object has no \"read\" method, ignoring chunk_size'\
- % type(contents).__name__
+ warn_msg = ('%s object has no "read" method, ignoring chunk_size'
+ % type(contents).__name__)
warnings.warn(warn_msg, stacklevel=2)
conn.request('PUT', path, contents, headers)
+
resp = conn.getresponse()
body = resp.read()
headers = {'X-Auth-Token': token}
@@ -1018,7 +1017,8 @@ def put_object(url, token=None, container=None, name=None, contents=None,
http_status=resp.status, http_reason=resp.reason,
http_response_content=body)
- return resp.getheader('etag', '').strip('"')
+ etag = resp.getheader('etag', '').strip('"')
+ return etag
def post_object(url, token, container, name, headers, http_conn=None,
diff --git a/swiftclient/service.py b/swiftclient/service.py
index 2980fd8..f24d430 100644
--- a/swiftclient/service.py
+++ b/swiftclient/service.py
@@ -39,7 +39,9 @@ from swiftclient import Connection
from swiftclient.command_helpers import (
stat_account, stat_container, stat_object
)
-from swiftclient.utils import config_true_value
+from swiftclient.utils import (
+ config_true_value, ReadableToIterable, LengthWrapper
+)
from swiftclient.exceptions import ClientException
from swiftclient.multithreading import MultiThreadingManager
@@ -1465,11 +1467,18 @@ class SwiftService(object):
fp = open(path, 'rb')
fp.seek(segment_start)
+ contents = LengthWrapper(fp, segment_size, md5=True)
etag = conn.put_object(segment_container,
- segment_name, fp,
+ segment_name, contents,
content_length=segment_size,
response_dict=results_dict)
+ if etag and etag != contents.get_md5sum():
+ raise SwiftError('Segment upload failed: remote and local '
+ 'object md5 did not match, {0} != {1}\n'
+ 'remote segment has not been removed.'
+ .format(etag, contents.get_md5sum()))
+
res.update({
'success': True,
'response_dict': results_dict,
@@ -1695,21 +1704,28 @@ class SwiftService(object):
res['manifest_response_dict'] = mr
else:
res['large_object'] = False
+ obr = {}
if path is not None:
- obr = {}
- conn.put_object(
- container, obj, open(path, 'rb'),
- content_length=getsize(path), headers=put_headers,
- response_dict=obr
- )
- res['response_dict'] = obr
+ content_length = getsize(path)
+ contents = LengthWrapper(open(path, 'rb'), content_length,
+ md5=True)
else:
- obr = {}
- conn.put_object(
- container, obj, stream, headers=put_headers,
- response_dict=obr
- )
- res['response_dict'] = obr
+ content_length = None
+ contents = ReadableToIterable(stream, md5=True)
+
+ etag = conn.put_object(
+ container, obj, contents,
+ content_length=content_length, headers=put_headers,
+ response_dict=obr
+ )
+ res['response_dict'] = obr
+
+ if etag and etag != contents.get_md5sum():
+ raise SwiftError('Object upload failed: remote and local '
+ 'object md5 did not match, {0} != {1}\n'
+ 'remote object has not been removed.'
+ .format(etag, contents.get_md5sum()))
+
if old_manifest or old_slo_manifest_paths:
drs = []
if old_manifest:
diff --git a/swiftclient/utils.py b/swiftclient/utils.py
index 0f442b3..f0fcc01 100644
--- a/swiftclient/utils.py
+++ b/swiftclient/utils.py
@@ -44,7 +44,7 @@ def prt_bytes(bytes, human_flag):
mods = list('KMGTPEZY')
temp = float(bytes)
if temp > 0:
- while (temp > 1023):
+ while temp > 1023:
try:
suffix = mods.pop(0)
except IndexError:
@@ -60,7 +60,7 @@ def prt_bytes(bytes, human_flag):
else:
bytes = '%12s' % bytes
- return(bytes)
+ return bytes
def generate_temp_url(path, seconds, key, method):
@@ -104,23 +104,105 @@ def generate_temp_url(path, seconds, key, method):
'{sig}&temp_url_expires={exp}'.format(
path=path,
sig=sig,
- exp=expiration)
- )
+ exp=expiration))
+
+
+class NoopMD5(object):
+ def __init__(self, *a, **kw):
+ pass
+
+ def update(self, *a, **kw):
+ pass
+
+ def hexdigest(self, *a, **kw):
+ return ''
+
+
+class ReadableToIterable(object):
+ """
+ Wrap a filelike object and act as an iterator.
+
+ It is recommended to use this class only on files opened in binary mode.
+ Due to the Unicode changes in python 3 files are now opened using an
+ encoding not suitable for use with the md5 class and because of this
+ hit the exception on every call to next. This could cause problems,
+ especially with large files and small chunk sizes.
+ """
+
+ def __init__(self, content, chunk_size=65536, md5=False):
+ """
+ :param content: The filelike object that is yielded from.
+ :param chunk_size: The max size of each yielded item.
+ :param md5: Flag to enable calculating the MD5 of the content
+ as it is yielded.
+ """
+ self.md5sum = hashlib.md5() if md5 else NoopMD5()
+ self.content = content
+ self.chunk_size = chunk_size
+
+ def get_md5sum(self):
+ return self.md5sum.hexdigest()
+
+ def __next__(self):
+ """
+ Both ``__next__`` and ``next`` are provided to allow compatibility
+ with python 2 and python 3 and their use of ``iterable.next()``
+ and ``next(iterable)`` respectively.
+ """
+ chunk = self.content.read(self.chunk_size)
+ if not chunk:
+ raise StopIteration
+
+ try:
+ self.md5sum.update(chunk)
+ except TypeError:
+ self.md5sum.update(chunk.encode())
+
+ return chunk
+
+ def next(self):
+ return self.__next__()
+
+ def __iter__(self):
+ return self
class LengthWrapper(object):
+ """
+ Wrap a filelike object with a maximum length.
- def __init__(self, readable, length):
+ Fix for https://github.com/kennethreitz/requests/issues/1648
+ It is recommended to use this class only on files opened in binary mode.
+ """
+ def __init__(self, readable, length, md5=False):
+ """
+ :param readable: The filelike object to read from.
+ :param length: The maximum amount of content to that can be read from
+ the filelike object before it is simulated to be
+ empty.
+ :param md5: Flag to enable calculating the MD5 of the content
+ as it is read.
+ """
+ self.md5sum = hashlib.md5() if md5 else NoopMD5()
self._length = self._remaining = length
self._readable = readable
def __len__(self):
return self._length
+ def get_md5sum(self):
+ return self.md5sum.hexdigest()
+
def read(self, *args, **kwargs):
if self._remaining <= 0:
return ''
- chunk = self._readable.read(
- *args, **kwargs)[:self._remaining]
+
+ chunk = self._readable.read(*args, **kwargs)[:self._remaining]
self._remaining -= len(chunk)
+
+ try:
+ self.md5sum.update(chunk)
+ except TypeError:
+ self.md5sum.update(chunk.encode())
+
return chunk
diff --git a/tests/unit/test_service.py b/tests/unit/test_service.py
index 0a0af89..3309813 100644
--- a/tests/unit/test_service.py
+++ b/tests/unit/test_service.py
@@ -16,14 +16,16 @@ import mock
import os
import tempfile
import testtools
+import time
from hashlib import md5
from mock import Mock, PropertyMock
from six.moves.queue import Queue, Empty as QueueEmptyError
from six import BytesIO
import swiftclient
-from swiftclient.service import SwiftService, SwiftError
+import swiftclient.utils as utils
from swiftclient.client import Connection
+from swiftclient.service import SwiftService, SwiftError
clean_os_environ = {}
@@ -548,3 +550,270 @@ class TestService(testtools.TestCase):
except SwiftError as exc:
self.assertEqual('Segment size should be an integer value',
exc.value)
+
+
+class TestServiceUpload(testtools.TestCase):
+
+ def _assertDictEqual(self, a, b, m=None):
+ # assertDictEqual is not available in py2.6 so use a shallow check
+ # instead
+ if not m:
+ m = '{0} != {1}'.format(a, b)
+
+ if hasattr(self, 'assertDictEqual'):
+ self.assertDictEqual(a, b, m)
+ else:
+ self.assertTrue(isinstance(a, dict), m)
+ self.assertTrue(isinstance(b, dict), m)
+ self.assertEqual(len(a), len(b), m)
+ for k, v in a.items():
+ self.assertIn(k, b, m)
+ self.assertEqual(b[k], v, m)
+
+ def test_upload_segment_job(self):
+ with tempfile.NamedTemporaryFile() as f:
+ f.write(b'a' * 10)
+ f.write(b'b' * 10)
+ f.write(b'c' * 10)
+ f.flush()
+
+ # Mock the connection to return an empty etag. This
+ # skips etag validation which would fail as the LengthWrapper
+ # isnt read from.
+ mock_conn = mock.Mock()
+ mock_conn.put_object.return_value = ''
+ type(mock_conn).attempts = mock.PropertyMock(return_value=2)
+ expected_r = {
+ 'action': 'upload_segment',
+ 'for_object': 'test_o',
+ 'segment_index': 2,
+ 'segment_size': 10,
+ 'segment_location': '/test_c_segments/test_s_1',
+ 'log_line': 'test_o segment 2',
+ 'success': True,
+ 'response_dict': {},
+ 'segment_etag': '',
+ 'attempts': 2,
+ }
+
+ s = SwiftService()
+ r = s._upload_segment_job(conn=mock_conn,
+ path=f.name,
+ container='test_c',
+ segment_name='test_s_1',
+ segment_start=10,
+ segment_size=10,
+ segment_index=2,
+ obj_name='test_o',
+ options={'segment_container': None})
+
+ self._assertDictEqual(r, expected_r)
+
+ self.assertEqual(mock_conn.put_object.call_count, 1)
+ mock_conn.put_object.assert_called_with('test_c_segments',
+ 'test_s_1',
+ mock.ANY,
+ content_length=10,
+ response_dict={})
+ contents = mock_conn.put_object.call_args[0][2]
+ self.assertIsInstance(contents, utils.LengthWrapper)
+ self.assertEqual(len(contents), 10)
+ # This read forces the LengthWrapper to calculate the md5
+ # for the read content.
+ self.assertEqual(contents.read(), b'b' * 10)
+ self.assertEqual(contents.get_md5sum(), md5(b'b' * 10).hexdigest())
+
+ def test_upload_segment_job_etag_mismatch(self):
+ def _consuming_conn(*a, **kw):
+ contents = a[2]
+ contents.read() # Force md5 calculation
+ return 'badresponseetag'
+
+ with tempfile.NamedTemporaryFile() as f:
+ f.write(b'a' * 10)
+ f.write(b'b' * 10)
+ f.write(b'c' * 10)
+ f.flush()
+
+ mock_conn = mock.Mock()
+ mock_conn.put_object.side_effect = _consuming_conn
+ type(mock_conn).attempts = mock.PropertyMock(return_value=2)
+
+ s = SwiftService()
+ r = s._upload_segment_job(conn=mock_conn,
+ path=f.name,
+ container='test_c',
+ segment_name='test_s_1',
+ segment_start=10,
+ segment_size=10,
+ segment_index=2,
+ obj_name='test_o',
+ options={'segment_container': None})
+
+ self.assertIn('error', r)
+ self.assertTrue(r['error'].value.find('md5 did not match') >= 0)
+
+ self.assertEqual(mock_conn.put_object.call_count, 1)
+ mock_conn.put_object.assert_called_with('test_c_segments',
+ 'test_s_1',
+ mock.ANY,
+ content_length=10,
+ response_dict={})
+ contents = mock_conn.put_object.call_args[0][2]
+ self.assertEqual(contents.get_md5sum(), md5(b'b' * 10).hexdigest())
+
+ def test_upload_object_job_file(self):
+ # Uploading a file results in the file object being wrapped in a
+ # LengthWrapper. This test sets the options is such a way that much
+ # of _upload_object_job is skipped bringing the critical path down
+ # to around 60 lines to ease testing.
+ with tempfile.NamedTemporaryFile() as f:
+ f.write(b'a' * 30)
+ f.flush()
+ expected_r = {
+ 'action': 'upload_object',
+ 'attempts': 2,
+ 'container': 'test_c',
+ 'headers': {},
+ 'large_object': False,
+ 'object': 'test_o',
+ 'response_dict': {},
+ 'status': 'uploaded',
+ 'success': True,
+ }
+ expected_mtime = float(os.path.getmtime(f.name))
+
+ mock_conn = mock.Mock()
+ mock_conn.put_object.return_value = ''
+ type(mock_conn).attempts = mock.PropertyMock(return_value=2)
+
+ s = SwiftService()
+ r = s._upload_object_job(conn=mock_conn,
+ container='test_c',
+ source=f.name,
+ obj='test_o',
+ options={'changed': False,
+ 'skip_identical': False,
+ 'leave_segments': True,
+ 'header': '',
+ 'segment_size': 0})
+
+ # Check for mtime and path separately as they are calculated
+ # from the temp file and will be different each time.
+ mtime = float(r['headers']['x-object-meta-mtime'])
+ self.assertAlmostEqual(mtime, expected_mtime, delta=1)
+ del r['headers']['x-object-meta-mtime']
+
+ self.assertEqual(r['path'], f.name)
+ del r['path']
+
+ self._assertDictEqual(r, expected_r)
+ self.assertEqual(mock_conn.put_object.call_count, 1)
+ mock_conn.put_object.assert_called_with('test_c', 'test_o',
+ mock.ANY,
+ content_length=30,
+ headers={},
+ response_dict={})
+ contents = mock_conn.put_object.call_args[0][2]
+ self.assertIsInstance(contents, utils.LengthWrapper)
+ self.assertEqual(len(contents), 30)
+ # This read forces the LengthWrapper to calculate the md5
+ # for the read content.
+ self.assertEqual(contents.read(), b'a' * 30)
+ self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest())
+
+ def test_upload_object_job_stream(self):
+ # Streams are wrapped as ReadableToIterable
+ with tempfile.TemporaryFile() as f:
+ f.write(b'a' * 30)
+ f.flush()
+ f.seek(0)
+ expected_r = {
+ 'action': 'upload_object',
+ 'attempts': 2,
+ 'container': 'test_c',
+ 'headers': {},
+ 'large_object': False,
+ 'object': 'test_o',
+ 'response_dict': {},
+ 'status': 'uploaded',
+ 'success': True,
+ 'path': None,
+ }
+ expected_mtime = round(time.time())
+
+ mock_conn = mock.Mock()
+ mock_conn.put_object.return_value = ''
+ type(mock_conn).attempts = mock.PropertyMock(return_value=2)
+
+ s = SwiftService()
+ r = s._upload_object_job(conn=mock_conn,
+ container='test_c',
+ source=f,
+ obj='test_o',
+ options={'changed': False,
+ 'skip_identical': False,
+ 'leave_segments': True,
+ 'header': '',
+ 'segment_size': 0})
+
+ mtime = float(r['headers']['x-object-meta-mtime'])
+ self.assertAlmostEqual(mtime, expected_mtime, delta=10)
+ del r['headers']['x-object-meta-mtime']
+
+ self._assertDictEqual(r, expected_r)
+ self.assertEqual(mock_conn.put_object.call_count, 1)
+ mock_conn.put_object.assert_called_with('test_c', 'test_o',
+ mock.ANY,
+ content_length=None,
+ headers={},
+ response_dict={})
+ contents = mock_conn.put_object.call_args[0][2]
+ self.assertIsInstance(contents, utils.ReadableToIterable)
+ self.assertEqual(contents.chunk_size, 65536)
+ # next retreives the first chunk of the stream or len(chunk_size)
+ # or less, it also forces the md5 to be calculated.
+ self.assertEqual(next(contents), b'a' * 30)
+ self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest())
+
+ def test_upload_object_job_etag_mismatch(self):
+ # The etag test for both streams and files use the same code
+ # so only one test should be needed.
+ def _consuming_conn(*a, **kw):
+ contents = a[2]
+ contents.read() # Force md5 calculation
+ return 'badresponseetag'
+
+ with tempfile.NamedTemporaryFile() as f:
+ f.write(b'a' * 30)
+ f.flush()
+
+ mock_conn = mock.Mock()
+ mock_conn.put_object.side_effect = _consuming_conn
+ type(mock_conn).attempts = mock.PropertyMock(return_value=2)
+
+ s = SwiftService()
+ r = s._upload_object_job(conn=mock_conn,
+ container='test_c',
+ source=f.name,
+ obj='test_o',
+ options={'changed': False,
+ 'skip_identical': False,
+ 'leave_segments': True,
+ 'header': '',
+ 'segment_size': 0})
+
+ self.assertEqual(r['success'], False)
+ self.assertIn('error', r)
+ self.assertTrue(r['error'].value.find('md5 did not match') >= 0)
+
+ self.assertEqual(mock_conn.put_object.call_count, 1)
+ expected_headers = {'x-object-meta-mtime': mock.ANY}
+ mock_conn.put_object.assert_called_with('test_c', 'test_o',
+ mock.ANY,
+ content_length=30,
+ headers=expected_headers,
+ response_dict={})
+
+ contents = mock_conn.put_object.call_args[0][2]
+ self.assertEqual(contents.get_md5sum(), md5(b'a' * 30).hexdigest())
diff --git a/tests/unit/test_shell.py b/tests/unit/test_shell.py
index c5a47b8..28aea7d 100644
--- a/tests/unit/test_shell.py
+++ b/tests/unit/test_shell.py
@@ -400,6 +400,8 @@ class TestShell(unittest.TestCase):
def test_upload(self, connection, walk):
connection.return_value.head_object.return_value = {
'content-length': '0'}
+ connection.return_value.put_object.return_value = (
+ 'd41d8cd98f00b204e9800998ecf8427e')
connection.return_value.attempts = 0
argv = ["", "upload", "container", self.tmpfile,
"-H", "X-Storage-Policy:one"]
@@ -475,6 +477,8 @@ class TestShell(unittest.TestCase):
connection.return_value.get_object.return_value = ({}, json.dumps(
[{'name': 'container1/old_seg1'}, {'name': 'container2/old_seg2'}]
))
+ connection.return_value.put_object.return_value = (
+ 'd41d8cd98f00b204e9800998ecf8427e')
swiftclient.shell.main(argv)
connection.return_value.put_object.assert_called_with(
'container',
@@ -504,6 +508,8 @@ class TestShell(unittest.TestCase):
connection.return_value.head_object.return_value = {
'content-length': '0'}
connection.return_value.attempts = 0
+ connection.return_value.put_object.return_value = (
+ 'd41d8cd98f00b204e9800998ecf8427e')
argv = ["", "upload", "container", self.tmpfile, "-S", "10",
"-C", "container"]
with open(self.tmpfile, "wb") as fh:
diff --git a/tests/unit/test_swiftclient.py b/tests/unit/test_swiftclient.py
index 0360016..9ebcff5 100644
--- a/tests/unit/test_swiftclient.py
+++ b/tests/unit/test_swiftclient.py
@@ -23,9 +23,10 @@ except ImportError:
import six
import socket
-import types
import testtools
import warnings
+import tempfile
+from hashlib import md5
from six.moves.urllib.parse import urlparse
from six.moves import reload_module
@@ -92,16 +93,22 @@ class TestJsonImport(testtools.TestCase):
self.assertEqual(c.json_loads, json.loads)
-class MockHttpResponse():
- def __init__(self, status=0):
+class MockHttpResponse(object):
+ def __init__(self, status=0, headers=None, verify=False):
self.status = status
self.status_code = status
self.reason = "OK"
self.buffer = []
self.requests_params = None
-
- class Raw:
- def read():
+ self.verify = verify
+ self.md5sum = md5()
+ # zero byte hash
+ self.headers = {'etag': '"d41d8cd98f00b204e9800998ecf8427e"'}
+ if headers:
+ self.headers.update(headers)
+
+ class Raw(object):
+ def read(self):
pass
self.raw = Raw()
@@ -109,17 +116,21 @@ class MockHttpResponse():
return ""
def getheader(self, name, default):
- return ""
+ return self.headers.get(name, default)
def getheaders(self):
return {"key1": "value1", "key2": "value2"}
def fake_response(self):
- return MockHttpResponse(self.status)
+ return self
def _fake_request(self, *arg, **kwarg):
self.status = 200
self.requests_params = kwarg
+ if self.verify:
+ for chunk in kwarg['data']:
+ self.md5sum.update(chunk)
+
# This simulate previous httplib implementation that would do a
# putrequest() and then use putheader() to send header.
for k, v in kwarg['headers'].items():
@@ -665,7 +676,7 @@ class TestPutObject(MockHttpTest):
conn = c.http_connection(u'http://www.test.com/')
mock_file = six.StringIO(u'\u5929\u7a7a\u4e2d\u7684\u4e4c\u4e91')
args = (u'\u5929\u7a7a\u4e2d\u7684\u4e4c\u4e91',
- '\u5929\u7a7a\u4e2d\u7684\u4e4c\u4e91',
+ u'\u5929\u7a7a\u4e2d\u7684\u4e4c\u4e91',
u'\u5929\u7a7a\u4e2d\u7684\u4e4c\u4e91',
u'\u5929\u7a7a\u4e2d\u7684\u4e4c\u4e91',
mock_file)
@@ -732,25 +743,22 @@ class TestPutObject(MockHttpTest):
resp = MockHttpResponse(status=200)
conn[1].getresponse = resp.fake_response
conn[1]._request = resp._fake_request
- astring = 'asdf'
- astring_len = len(astring)
- mock_file = six.StringIO(astring)
+ raw_data = b'asdf' * 256
+ raw_data_len = len(raw_data)
- c.put_object(url='http://www.test.com', http_conn=conn,
- contents=mock_file, content_length=astring_len)
- self.assertTrue(isinstance(resp.requests_params['data'],
- swiftclient.utils.LengthWrapper))
- self.assertEqual(astring_len,
- len(resp.requests_params['data'].read()))
+ for kwarg in ({'headers': {'Content-Length': str(raw_data_len)}},
+ {'content_length': raw_data_len}):
+ with tempfile.TemporaryFile() as mock_file:
+ mock_file.write(raw_data)
+ mock_file.seek(0)
- mock_file = six.StringIO(astring)
- c.put_object(url='http://www.test.com', http_conn=conn,
- headers={'Content-Length': str(astring_len)},
- contents=mock_file)
- self.assertTrue(isinstance(resp.requests_params['data'],
- swiftclient.utils.LengthWrapper))
- self.assertEqual(astring_len,
- len(resp.requests_params['data'].read()))
+ c.put_object(url='http://www.test.com', http_conn=conn,
+ contents=mock_file, **kwarg)
+
+ req_data = resp.requests_params['data']
+ self.assertTrue(isinstance(req_data,
+ swiftclient.utils.LengthWrapper))
+ self.assertEqual(raw_data_len, len(req_data.read()))
def test_chunk_upload(self):
# Chunked upload happens when no content_length is passed to put_object
@@ -758,19 +766,71 @@ class TestPutObject(MockHttpTest):
resp = MockHttpResponse(status=200)
conn[1].getresponse = resp.fake_response
conn[1]._request = resp._fake_request
- raw_data = 'asdf' * 256
+ raw_data = b'asdf' * 256
chunk_size = 16
- mock_file = six.StringIO(raw_data)
- c.put_object(url='http://www.test.com', http_conn=conn,
- contents=mock_file, chunk_size=chunk_size)
- request_data = resp.requests_params['data']
- self.assertTrue(isinstance(request_data, types.GeneratorType))
- data = ''
- for chunk in request_data:
- self.assertEqual(chunk_size, len(chunk))
- data += chunk
- self.assertEqual(data, raw_data)
+ with tempfile.TemporaryFile() as mock_file:
+ mock_file.write(raw_data)
+ mock_file.seek(0)
+
+ c.put_object(url='http://www.test.com', http_conn=conn,
+ contents=mock_file, chunk_size=chunk_size)
+ req_data = resp.requests_params['data']
+ self.assertTrue(hasattr(req_data, '__iter__'))
+ data = b''
+ for chunk in req_data:
+ self.assertEqual(chunk_size, len(chunk))
+ data += chunk
+ self.assertEqual(data, raw_data)
+
+ def test_md5_mismatch(self):
+ conn = c.http_connection('http://www.test.com')
+ resp = MockHttpResponse(status=200, verify=True,
+ headers={'etag': '"badresponseetag"'})
+ conn[1].getresponse = resp.fake_response
+ conn[1]._request = resp._fake_request
+ raw_data = b'asdf' * 256
+ raw_data_md5 = md5(raw_data).hexdigest()
+ chunk_size = 16
+
+ with tempfile.TemporaryFile() as mock_file:
+ mock_file.write(raw_data)
+ mock_file.seek(0)
+
+ contents = swiftclient.utils.ReadableToIterable(mock_file,
+ md5=True)
+
+ etag = c.put_object(url='http://www.test.com',
+ http_conn=conn,
+ contents=contents,
+ chunk_size=chunk_size)
+
+ self.assertNotEquals(etag, contents.get_md5sum())
+ self.assertEquals(raw_data_md5, contents.get_md5sum())
+
+ def test_md5_match(self):
+ conn = c.http_connection('http://www.test.com')
+ raw_data = b'asdf' * 256
+ raw_data_md5 = md5(raw_data).hexdigest()
+ resp = MockHttpResponse(status=200, verify=True,
+ headers={'etag': '"' + raw_data_md5 + '"'})
+ conn[1].getresponse = resp.fake_response
+ conn[1]._request = resp._fake_request
+ chunk_size = 16
+
+ with tempfile.TemporaryFile() as mock_file:
+ mock_file.write(raw_data)
+ mock_file.seek(0)
+ contents = swiftclient.utils.ReadableToIterable(mock_file,
+ md5=True)
+
+ etag = c.put_object(url='http://www.test.com',
+ http_conn=conn,
+ contents=contents,
+ chunk_size=chunk_size)
+
+ self.assertEquals(raw_data_md5, contents.get_md5sum())
+ self.assertEquals(etag, contents.get_md5sum())
def test_params(self):
conn = c.http_connection(u'http://www.test.com/')
diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py
index f072aed..d82d2b8 100644
--- a/tests/unit/test_utils.py
+++ b/tests/unit/test_utils.py
@@ -14,10 +14,10 @@
# limitations under the License.
import testtools
-
import mock
import six
import tempfile
+from hashlib import md5
from swiftclient import utils as u
@@ -161,39 +161,111 @@ class TestTempURL(testtools.TestCase):
self.method)
+class TestReadableToIterable(testtools.TestCase):
+
+ def test_iter(self):
+ chunk_size = 4
+ write_data = tuple(x.encode() for x in ('a', 'b', 'c', 'd'))
+ actual_md5sum = md5()
+
+ with tempfile.TemporaryFile() as f:
+ for x in write_data:
+ f.write(x * chunk_size)
+ actual_md5sum.update(x * chunk_size)
+ f.seek(0)
+ data = u.ReadableToIterable(f, chunk_size, True)
+
+ for i, data_chunk in enumerate(data):
+ self.assertEquals(chunk_size, len(data_chunk))
+ self.assertEquals(data_chunk, write_data[i] * chunk_size)
+
+ self.assertEquals(actual_md5sum.hexdigest(), data.get_md5sum())
+
+ def test_md5_creation(self):
+ # Check creation with a real and noop md5 class
+ data = u.ReadableToIterable(None, None, md5=True)
+ self.assertEquals(md5().hexdigest(), data.get_md5sum())
+ self.assertTrue(isinstance(data.md5sum, type(md5())))
+
+ data = u.ReadableToIterable(None, None, md5=False)
+ self.assertEquals('', data.get_md5sum())
+ self.assertTrue(isinstance(data.md5sum, type(u.NoopMD5())))
+
+ def test_unicode(self):
+ # Check no errors are raised if unicode data is feed in.
+ unicode_data = u'abc'
+ actual_md5sum = md5(unicode_data.encode()).hexdigest()
+ chunk_size = 2
+
+ with tempfile.TemporaryFile(mode='w+') as f:
+ f.write(unicode_data)
+ f.seek(0)
+ data = u.ReadableToIterable(f, chunk_size, True)
+
+ x = next(data)
+ self.assertEquals(2, len(x))
+ self.assertEquals(unicode_data[:2], x)
+
+ x = next(data)
+ self.assertEquals(1, len(x))
+ self.assertEquals(unicode_data[2:], x)
+
+ self.assertEquals(actual_md5sum, data.get_md5sum())
+
+
class TestLengthWrapper(testtools.TestCase):
def test_stringio(self):
- contents = six.StringIO('a' * 100)
- data = u.LengthWrapper(contents, 42)
+ contents = six.StringIO(u'a' * 100)
+ data = u.LengthWrapper(contents, 42, True)
+ s = u'a' * 42
+ read_data = u''.join(iter(data.read, ''))
+
+ self.assertEqual(42, len(data))
+ self.assertEqual(42, len(read_data))
+ self.assertEqual(s, read_data)
+ self.assertEqual(md5(s.encode()).hexdigest(), data.get_md5sum())
+
+ def test_bytesio(self):
+ contents = six.BytesIO(b'a' * 100)
+ data = u.LengthWrapper(contents, 42, True)
+ s = b'a' * 42
+ read_data = b''.join(iter(data.read, ''))
+
self.assertEqual(42, len(data))
- read_data = ''.join(iter(data.read, ''))
self.assertEqual(42, len(read_data))
- self.assertEqual('a' * 42, read_data)
+ self.assertEqual(s, read_data)
+ self.assertEqual(md5(s).hexdigest(), data.get_md5sum())
def test_tempfile(self):
- with tempfile.NamedTemporaryFile(mode='w') as f:
- f.write('a' * 100)
+ with tempfile.NamedTemporaryFile(mode='wb') as f:
+ f.write(b'a' * 100)
f.flush()
- contents = open(f.name)
- data = u.LengthWrapper(contents, 42)
+ contents = open(f.name, 'rb')
+ data = u.LengthWrapper(contents, 42, True)
+ s = b'a' * 42
+ read_data = b''.join(iter(data.read, ''))
+
self.assertEqual(42, len(data))
- read_data = ''.join(iter(data.read, ''))
self.assertEqual(42, len(read_data))
- self.assertEqual('a' * 42, read_data)
+ self.assertEqual(s, read_data)
+ self.assertEqual(md5(s).hexdigest(), data.get_md5sum())
def test_segmented_file(self):
- with tempfile.NamedTemporaryFile(mode='w') as f:
+ with tempfile.NamedTemporaryFile(mode='wb') as f:
segment_length = 1024
segments = ('a', 'b', 'c', 'd')
for c in segments:
- f.write(c * segment_length)
+ f.write((c * segment_length).encode())
f.flush()
for i, c in enumerate(segments):
- contents = open(f.name)
+ contents = open(f.name, 'rb')
contents.seek(i * segment_length)
- data = u.LengthWrapper(contents, segment_length)
+ data = u.LengthWrapper(contents, segment_length, True)
+ read_data = b''.join(iter(data.read, ''))
+ s = (c * segment_length).encode()
+
self.assertEqual(segment_length, len(data))
- read_data = ''.join(iter(data.read, ''))
self.assertEqual(segment_length, len(read_data))
- self.assertEqual(c * segment_length, read_data)
+ self.assertEqual(s, read_data)
+ self.assertEqual(md5(s).hexdigest(), data.get_md5sum())
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 2467ca6..88d6d12 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -127,7 +127,7 @@ def fake_http_connect(*code_iter, **kwargs):
'last-modified': self.timestamp,
'x-object-meta-test': 'testing',
'etag':
- self.etag or '"68b329da9893e34099c7d8ad5cb9c940"',
+ self.etag or '"d41d8cd98f00b204e9800998ecf8427e"',
'x-works': 'yes',
'x-account-container-count': 12345}
if not self.timestamp: