proxy: Use the right ranges when going to multiple object servers

When the proxy times out talking to a backend server (say, because it was under heavy load and having trouble servicing the request), we catch the ChunkReadTimeout and try to get the rest from another server. The client by and large doesn't care; there may be a brief pause in the download while the proxy get the new connection, but all the bytes arrive and in the right order: GET from node1, serve bytes 0 through N, timeout GET from node2, serve bytes N through end When we calculate the range for the new request, we check to see if we already have one from the previous request -- if one exists, we adjust it based on the bytes sent to the client thus far. This works fine for single failures, but if we need to go back *again* we double up the offset and send the client incomplete, bad data: GET from node1, serve bytes 0 through N, timeout GET from node2, serve bytes N through M, timeout GET from node3, serve bytes N + M through end Leaving the client missing bytes M through N + M. We should adjust the range based on the number of bytes pulled from the *backend* rather than delivered to the *frontend*. This just requires that we reset our book-keeping after adjusting the Range header. Change-Id: Ie153d01479c4242c01f48bf0ada78c2f9b6c8ff0 Closes-Bug: 1717401 (cherry picked from commit 6b19ca7a7d5833f5648976d8d30c776975e361db)
author: Tim Burke <tim.burke@gmail.com> 2017-09-15 22:52:26 +0000
committer: Tim Burke <tim.burke@gmail.com> 2017-09-18 19:07:41 +0000
commit: 35ab6a5da363aa10c8fa88df0a388ad6518164e7 (patch)
tree: febf368a62cdad2ce77b7c8927a567fdb77e11b9
parent: 69c715c505cf9e5df29dc1dff2fa1a4847471cb6 (diff)
download: swift-35ab6a5da363aa10c8fa88df0a388ad6518164e7.tar.gz
2 files changed, 26 insertions, 7 deletions
diff --git a/swift/proxy/controllers/base.py b/swift/proxy/controllers/base.py
index be98bc15d..d1e37cbc4 100644
--- a/swift/proxy/controllers/base.py
+++ b/swift/proxy/controllers/base.py
@@ -771,14 +771,16 @@ class ResumingGetter(object):
                            this request. This will change the Range header
                            so that the next req will start where it left off.
 
-        :raises ValueError: if invalid range header
         :raises HTTPRequestedRangeNotSatisfiable: if begin + num_bytes
                                                   > end of range + 1
         :raises RangeAlreadyComplete: if begin + num_bytes == end of range + 1
         """
-        if 'Range' in self.backend_headers:
-            req_range = Range(self.backend_headers['Range'])
+        try:
+            req_range = Range(self.backend_headers.get('Range'))
+        except ValueError:
+            req_range = None
 
+        if req_range:
             begin, end = req_range.ranges[0]
             if begin is None:
                 # this is a -50 range req (last 50 bytes of file)
@@ -802,6 +804,9 @@ class ResumingGetter(object):
         else:
             self.backend_headers['Range'] = 'bytes=%d-' % num_bytes
 
+        # Reset so if we need to do this more than once, we don't double-up
+        self.bytes_used_from_backend = 0
+
     def pop_range(self):
         """
         Remove the first byterange from our Range header.
diff --git a/test/unit/proxy/controllers/test_base.py b/test/unit/proxy/controllers/test_base.py
index b241a5cec..4a48f4bc1 100644
--- a/test/unit/proxy/controllers/test_base.py
+++ b/test/unit/proxy/controllers/test_base.py
@@ -876,18 +876,32 @@ class TestFuncs(unittest.TestCase):
 
         node = {'ip': '1.2.3.4', 'port': 6200, 'device': 'sda'}
 
-        source1 = TestSource(['abcd', '1234', 'abc', None])
-        source2 = TestSource(['efgh5678'])
+        data = ['abcd', '1234', 'efgh', '5678', 'lots', 'more', 'data']
+
+        # NB: content length on source1 should be correct
+        # but that reversed piece never makes it to the client
+        source1 = TestSource(data[:2] + [data[2][::-1], None] + data[3:])
+        source2 = TestSource(data[2:4] + ['nope', None])
+        source3 = TestSource(data[4:])
         req = Request.blank('/v1/a/c/o')
         handler = GetOrHeadHandler(
             self.app, req, 'Object', None, None, None, {},
             client_chunk_size=8)
 
+        range_headers = []
+        sources = [(source2, node), (source3, node)]
+
+        def mock_get_source_and_node():
+            range_headers.append(handler.backend_headers['Range'])
+            return sources.pop(0)
+
         app_iter = handler._make_app_iter(req, node, source1)
         with mock.patch.object(handler, '_get_source_and_node',
-                               lambda: (source2, node)):
+                               side_effect=mock_get_source_and_node):
             client_chunks = list(app_iter)
-        self.assertEqual(client_chunks, ['abcd1234', 'efgh5678'])
+        self.assertEqual(range_headers, ['bytes=8-27', 'bytes=16-27'])
+        self.assertEqual(client_chunks, [
+            'abcd1234', 'efgh5678', 'lotsmore', 'data'])
 
     def test_client_chunk_size_resuming_chunked(self):
author	Tim Burke <tim.burke@gmail.com>	2017-09-15 22:52:26 +0000
committer	Tim Burke <tim.burke@gmail.com>	2017-09-18 19:07:41 +0000
commit	35ab6a5da363aa10c8fa88df0a388ad6518164e7 (patch)
tree	febf368a62cdad2ce77b7c8927a567fdb77e11b9
parent	69c715c505cf9e5df29dc1dff2fa1a4847471cb6 (diff)
download	swift-35ab6a5da363aa10c8fa88df0a388ad6518164e7.tar.gz