diff options
author | Jenkins <jenkins@review.openstack.org> | 2015-02-24 08:22:18 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2015-02-24 08:22:18 +0000 |
commit | 19a298bdaa14b0c3586cbae003a6dc989870ded8 (patch) | |
tree | 10d778302530078e24c1bd96daeb68be64db2f4d /swiftclient | |
parent | 8aff0bda9c35ab96146f4f419803e62710ab5618 (diff) | |
parent | f0300e3714e0d8cdd0ed9e4744b813e3b263d034 (diff) | |
download | python-swiftclient-19a298bdaa14b0c3586cbae003a6dc989870ded8.tar.gz |
Merge "Verify MD5 of uploaded objects."
Diffstat (limited to 'swiftclient')
-rw-r--r-- | swiftclient/client.py | 40 | ||||
-rw-r--r-- | swiftclient/service.py | 46 | ||||
-rw-r--r-- | swiftclient/utils.py | 96 |
3 files changed, 140 insertions, 42 deletions
diff --git a/swiftclient/client.py b/swiftclient/client.py index d0ff52e..985cad8 100644 --- a/swiftclient/client.py +++ b/swiftclient/client.py @@ -36,7 +36,7 @@ import six from swiftclient import version as swiftclient_version from swiftclient.exceptions import ClientException -from swiftclient.utils import LengthWrapper +from swiftclient.utils import LengthWrapper, ReadableToIterable AUTH_VERSIONS_V1 = ('1.0', '1', 1) AUTH_VERSIONS_V2 = ('2.0', '2', 2) @@ -333,8 +333,8 @@ def get_auth_keystone(auth_url, user, key, os_options, **kwargs): except exceptions.Unauthorized: msg = 'Unauthorized. Check username, password and tenant name/id.' if auth_version in AUTH_VERSIONS_V3: - msg = 'Unauthorized. Check username/id, password, ' \ - + 'tenant name/id and user/tenant domain name/id.' + msg = ('Unauthorized. Check username/id, password, ' + 'tenant name/id and user/tenant domain name/id.') raise ClientException(msg) except exceptions.AuthorizationFailure as err: raise ClientException('Authorization Failure. %s' % err) @@ -388,8 +388,7 @@ def get_auth(auth_url, user, key, **kwargs): # We are handling a special use case here where the user argument # specifies both the user name and tenant name in the form tenant:user if user and not kwargs.get('tenant_name') and ':' in user: - (os_options['tenant_name'], - user) = user.split(':') + os_options['tenant_name'], user = user.split(':') # We are allowing to have an tenant_name argument in get_auth # directly without having os_options @@ -929,7 +928,8 @@ def put_object(url, token=None, container=None, name=None, contents=None, container name is expected to be part of the url :param name: object name to put; if None, the object name is expected to be part of the url - :param contents: a string or a file like object to read object data from; + :param contents: a string, a file like object or an iterable + to read object data from; if None, a zero-byte put will be done :param content_length: value to send as content-length header; also limits the amount read from contents; if None, it will be @@ -983,27 +983,26 @@ def put_object(url, token=None, container=None, name=None, contents=None, headers['Content-Type'] = '' if not contents: headers['Content-Length'] = '0' - if hasattr(contents, 'read'): + + if isinstance(contents, (ReadableToIterable, LengthWrapper)): + conn.putrequest(path, headers=headers, data=contents) + elif hasattr(contents, 'read'): if chunk_size is None: chunk_size = 65536 + if content_length is None: - def chunk_reader(): - while True: - data = contents.read(chunk_size) - if not data: - break - yield data - conn.putrequest(path, headers=headers, data=chunk_reader()) + data = ReadableToIterable(contents, chunk_size, md5=False) else: - # Fixes https://github.com/kennethreitz/requests/issues/1648 - data = LengthWrapper(contents, content_length) - conn.putrequest(path, headers=headers, data=data) + data = LengthWrapper(contents, content_length, md5=False) + + conn.putrequest(path, headers=headers, data=data) else: if chunk_size is not None: - warn_msg = '%s object has no \"read\" method, ignoring chunk_size'\ - % type(contents).__name__ + warn_msg = ('%s object has no "read" method, ignoring chunk_size' + % type(contents).__name__) warnings.warn(warn_msg, stacklevel=2) conn.request('PUT', path, contents, headers) + resp = conn.getresponse() body = resp.read() headers = {'X-Auth-Token': token} @@ -1018,7 +1017,8 @@ def put_object(url, token=None, container=None, name=None, contents=None, http_status=resp.status, http_reason=resp.reason, http_response_content=body) - return resp.getheader('etag', '').strip('"') + etag = resp.getheader('etag', '').strip('"') + return etag def post_object(url, token, container, name, headers, http_conn=None, diff --git a/swiftclient/service.py b/swiftclient/service.py index 2980fd8..f24d430 100644 --- a/swiftclient/service.py +++ b/swiftclient/service.py @@ -39,7 +39,9 @@ from swiftclient import Connection from swiftclient.command_helpers import ( stat_account, stat_container, stat_object ) -from swiftclient.utils import config_true_value +from swiftclient.utils import ( + config_true_value, ReadableToIterable, LengthWrapper +) from swiftclient.exceptions import ClientException from swiftclient.multithreading import MultiThreadingManager @@ -1465,11 +1467,18 @@ class SwiftService(object): fp = open(path, 'rb') fp.seek(segment_start) + contents = LengthWrapper(fp, segment_size, md5=True) etag = conn.put_object(segment_container, - segment_name, fp, + segment_name, contents, content_length=segment_size, response_dict=results_dict) + if etag and etag != contents.get_md5sum(): + raise SwiftError('Segment upload failed: remote and local ' + 'object md5 did not match, {0} != {1}\n' + 'remote segment has not been removed.' + .format(etag, contents.get_md5sum())) + res.update({ 'success': True, 'response_dict': results_dict, @@ -1695,21 +1704,28 @@ class SwiftService(object): res['manifest_response_dict'] = mr else: res['large_object'] = False + obr = {} if path is not None: - obr = {} - conn.put_object( - container, obj, open(path, 'rb'), - content_length=getsize(path), headers=put_headers, - response_dict=obr - ) - res['response_dict'] = obr + content_length = getsize(path) + contents = LengthWrapper(open(path, 'rb'), content_length, + md5=True) else: - obr = {} - conn.put_object( - container, obj, stream, headers=put_headers, - response_dict=obr - ) - res['response_dict'] = obr + content_length = None + contents = ReadableToIterable(stream, md5=True) + + etag = conn.put_object( + container, obj, contents, + content_length=content_length, headers=put_headers, + response_dict=obr + ) + res['response_dict'] = obr + + if etag and etag != contents.get_md5sum(): + raise SwiftError('Object upload failed: remote and local ' + 'object md5 did not match, {0} != {1}\n' + 'remote object has not been removed.' + .format(etag, contents.get_md5sum())) + if old_manifest or old_slo_manifest_paths: drs = [] if old_manifest: diff --git a/swiftclient/utils.py b/swiftclient/utils.py index 0f442b3..f0fcc01 100644 --- a/swiftclient/utils.py +++ b/swiftclient/utils.py @@ -44,7 +44,7 @@ def prt_bytes(bytes, human_flag): mods = list('KMGTPEZY') temp = float(bytes) if temp > 0: - while (temp > 1023): + while temp > 1023: try: suffix = mods.pop(0) except IndexError: @@ -60,7 +60,7 @@ def prt_bytes(bytes, human_flag): else: bytes = '%12s' % bytes - return(bytes) + return bytes def generate_temp_url(path, seconds, key, method): @@ -104,23 +104,105 @@ def generate_temp_url(path, seconds, key, method): '{sig}&temp_url_expires={exp}'.format( path=path, sig=sig, - exp=expiration) - ) + exp=expiration)) + + +class NoopMD5(object): + def __init__(self, *a, **kw): + pass + + def update(self, *a, **kw): + pass + + def hexdigest(self, *a, **kw): + return '' + + +class ReadableToIterable(object): + """ + Wrap a filelike object and act as an iterator. + + It is recommended to use this class only on files opened in binary mode. + Due to the Unicode changes in python 3 files are now opened using an + encoding not suitable for use with the md5 class and because of this + hit the exception on every call to next. This could cause problems, + especially with large files and small chunk sizes. + """ + + def __init__(self, content, chunk_size=65536, md5=False): + """ + :param content: The filelike object that is yielded from. + :param chunk_size: The max size of each yielded item. + :param md5: Flag to enable calculating the MD5 of the content + as it is yielded. + """ + self.md5sum = hashlib.md5() if md5 else NoopMD5() + self.content = content + self.chunk_size = chunk_size + + def get_md5sum(self): + return self.md5sum.hexdigest() + + def __next__(self): + """ + Both ``__next__`` and ``next`` are provided to allow compatibility + with python 2 and python 3 and their use of ``iterable.next()`` + and ``next(iterable)`` respectively. + """ + chunk = self.content.read(self.chunk_size) + if not chunk: + raise StopIteration + + try: + self.md5sum.update(chunk) + except TypeError: + self.md5sum.update(chunk.encode()) + + return chunk + + def next(self): + return self.__next__() + + def __iter__(self): + return self class LengthWrapper(object): + """ + Wrap a filelike object with a maximum length. - def __init__(self, readable, length): + Fix for https://github.com/kennethreitz/requests/issues/1648 + It is recommended to use this class only on files opened in binary mode. + """ + def __init__(self, readable, length, md5=False): + """ + :param readable: The filelike object to read from. + :param length: The maximum amount of content to that can be read from + the filelike object before it is simulated to be + empty. + :param md5: Flag to enable calculating the MD5 of the content + as it is read. + """ + self.md5sum = hashlib.md5() if md5 else NoopMD5() self._length = self._remaining = length self._readable = readable def __len__(self): return self._length + def get_md5sum(self): + return self.md5sum.hexdigest() + def read(self, *args, **kwargs): if self._remaining <= 0: return '' - chunk = self._readable.read( - *args, **kwargs)[:self._remaining] + + chunk = self._readable.read(*args, **kwargs)[:self._remaining] self._remaining -= len(chunk) + + try: + self.md5sum.update(chunk) + except TypeError: + self.md5sum.update(chunk.encode()) + return chunk |