From b960279280199f59417ad3e20ca5de11241ef1b0 Mon Sep 17 00:00:00 2001 From: Tim Burke Date: Wed, 10 Apr 2019 16:22:27 -0700 Subject: Fix SLO re-upload Previously, if you uploaded a file as an SLO then re-uploaded it with the same segment size and mtime, the second upload would go delete the segments it just (re)uploaded. This was due to us tracking old_slo_manifest_paths and new_slo_manifest_paths in different formats; one would have a leading slash while the other would not. Now, normalize to the stripped-slash version so we stop deleting segments we just uploaded. Change-Id: Ibcbed3df4febe81cdf13855656e2daaca8d521b4 (cherry picked from commit 9021a58c240e156f54ffafdc4609868f348d3ebc) (cherry picked from commit 2cd8b86075997f2997118bd92286849ddb0c93d7) --- swiftclient/service.py | 30 ++++++++++++++---------------- swiftclient/utils.py | 8 ++++++++ tests/unit/test_shell.py | 44 ++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 18 deletions(-) diff --git a/swiftclient/service.py b/swiftclient/service.py index eedad46..7b63a07 100644 --- a/swiftclient/service.py +++ b/swiftclient/service.py @@ -17,6 +17,7 @@ import logging import os +from collections import defaultdict from concurrent.futures import as_completed, CancelledError, TimeoutError from copy import deepcopy from errno import EEXIST, ENOENT @@ -44,7 +45,7 @@ from swiftclient.command_helpers import ( from swiftclient.utils import ( config_true_value, ReadableToIterable, LengthWrapper, EMPTY_ETAG, parse_api_response, report_traceback, n_groups, split_request_headers, - n_at_a_time + n_at_a_time, normalize_manifest_path ) from swiftclient.exceptions import ClientException from swiftclient.multithreading import MultiThreadingManager @@ -2069,11 +2070,9 @@ class SwiftService(object): if not options['leave_segments']: old_manifest = headers.get('x-object-manifest') if is_slo: - for old_seg in chunk_data: - seg_path = old_seg['name'].lstrip('/') - if isinstance(seg_path, text_type): - seg_path = seg_path.encode('utf-8') - old_slo_manifest_paths.append(seg_path) + old_slo_manifest_paths.extend( + normalize_manifest_path(old_seg['name']) + for old_seg in chunk_data) except ClientException as err: if err.http_status != 404: traceback, err_time = report_traceback() @@ -2163,8 +2162,9 @@ class SwiftService(object): response = self._upload_slo_manifest( conn, segment_results, container, obj, put_headers) res['manifest_response_dict'] = response - new_slo_manifest_paths = { - seg['segment_location'] for seg in segment_results} + new_slo_manifest_paths.update( + normalize_manifest_path(new_seg['segment_location']) + for new_seg in segment_results) else: new_object_manifest = '%s/%s/%s/%s/%s/' % ( quote(seg_container.encode('utf8')), @@ -2221,8 +2221,9 @@ class SwiftService(object): response = self._upload_slo_manifest( conn, results, container, obj, put_headers) res['manifest_response_dict'] = response - new_slo_manifest_paths = { - r['segment_location'] for r in results} + new_slo_manifest_paths.update( + normalize_manifest_path(new_seg['segment_location']) + for new_seg in results) res['large_object'] = True else: res['response_dict'] = ret @@ -2262,11 +2263,10 @@ class SwiftService(object): fp.close() if old_manifest or old_slo_manifest_paths: drs = [] - delobjsmap = {} + delobjsmap = defaultdict(list) if old_manifest: scontainer, sprefix = old_manifest.split('/', 1) sprefix = sprefix.rstrip('/') + '/' - delobjsmap[scontainer] = [] for part in self.list(scontainer, {'prefix': sprefix}): if not part["success"]: raise part["error"] @@ -2278,10 +2278,8 @@ class SwiftService(object): if seg_to_delete in new_slo_manifest_paths: continue scont, sobj = \ - seg_to_delete.split(b'/', 1) - delobjs_cont = delobjsmap.get(scont, []) - delobjs_cont.append(sobj) - delobjsmap[scont] = delobjs_cont + seg_to_delete.split('/', 1) + delobjsmap[scont].append(sobj) del_segs = [] for dscont, dsobjs in delobjsmap.items(): diff --git a/swiftclient/utils.py b/swiftclient/utils.py index 87a4390..2b208b9 100644 --- a/swiftclient/utils.py +++ b/swiftclient/utils.py @@ -395,3 +395,11 @@ def n_at_a_time(seq, n): def n_groups(seq, n): items_per_group = ((len(seq) - 1) // n) + 1 return n_at_a_time(seq, items_per_group) + + +def normalize_manifest_path(path): + if six.PY2 and isinstance(path, six.text_type): + path = path.encode('utf-8') + if path.startswith('/'): + return path[1:] + return path diff --git a/tests/unit/test_shell.py b/tests/unit/test_shell.py index 91496b8..ffea060 100644 --- a/tests/unit/test_shell.py +++ b/tests/unit/test_shell.py @@ -799,11 +799,11 @@ class TestShell(unittest.TestCase): response_dict={}) expected_delete_calls = [ mock.call( - b'container1', b'old_seg1', + 'container1', 'old_seg1', response_dict={} ), mock.call( - b'container2', b'old_seg2', + 'container2', 'old_seg2', response_dict={} ) ] @@ -834,6 +834,46 @@ class TestShell(unittest.TestCase): response_dict={}) self.assertFalse(connection.return_value.delete_object.mock_calls) + @mock.patch('swiftclient.service.Connection') + def test_reupload_leaves_slo_segments(self, connection): + with open(self.tmpfile, "wb") as fh: + fh.write(b'12345678901234567890') + mtime = '{:.6f}'.format(os.path.getmtime(self.tmpfile)) + expected_segments = [ + 'container_segments/{}/slo/{}/20/10/{:08d}'.format( + self.tmpfile[1:], mtime, i) + for i in range(2) + ] + + # Test re-upload overwriting a manifest doesn't remove + # segments it just wrote + connection.return_value.head_container.return_value = { + 'x-storage-policy': 'one'} + connection.return_value.attempts = 0 + argv = ["", "upload", "container", self.tmpfile, + "--use-slo", "-S", "10"] + connection.return_value.head_object.side_effect = [ + {'x-static-large-object': 'true', # For the upload call + 'content-length': '20'}] + connection.return_value.get_object.return_value = ( + {}, + # we've already *got* the expected manifest! + json.dumps([ + {'name': seg} for seg in expected_segments + ]).encode('ascii') + ) + connection.return_value.put_object.return_value = ( + 'd41d8cd98f00b204e9800998ecf8427e') + swiftclient.shell.main(argv) + connection.return_value.put_object.assert_called_with( + 'container', + self.tmpfile[1:], # drop leading / + mock.ANY, + headers={'x-object-meta-mtime': mtime}, + query_string='multipart-manifest=put', + response_dict={}) + self.assertFalse(connection.return_value.delete_object.mock_calls) + @mock.patch('swiftclient.service.Connection') def test_upload_delete_dlo_segments(self, connection): # Upload delete existing segments -- cgit v1.2.1