Avoid loops when gathering container listings from shards

Previously the proxy container controller could, in corner cases, get into a loop while building a listing for a sharded container. For example, if a root has a single shard then the proxy will be redirected to that shard, but if that shard has shrunk into the root then it will redirect the proxy back to the root, and so on until the root is updated with the shard's shrunken status. There is already a guard to prevent the proxy fetching shard ranges again from the same container that it is *currently* querying for listing parts. That deals with the case when a container fills in gaps in its listing shard ranges with a reference to itself. This patch extends that guard to prevent the proxy fetching shard ranges again from any container that has previously been queried for listing parts. Cherry-Picked-From: I7dc793f0ec65236c1278fd93d6b1f17c2db98d7b Change-Id: I6cff16a00e48c4d069000dd54c6f4cb9f02f773d
author: Alistair Coles <alistairncoles@gmail.com> 2021-01-06 16:22:26 +0000
committer: Pete Zaitcev <zaitcev@kotori.zaitcev.us> 2021-02-23 21:00:52 -0600
commit: e69b3c60feb8c60cae5ec27508fd6f860d8c14e0 (patch)
tree: 13ae50603d0b849010b780a5cb35a38fe51e1daf
parent: 18cdf9b5680b6d9a2201e9781771e3a5efac0355 (diff)
download: swift-e69b3c60feb8c60cae5ec27508fd6f860d8c14e0.tar.gz
3 files changed, 178 insertions, 5 deletions
diff --git a/swift/common/wsgi.py b/swift/common/wsgi.py
index f9cfd1f2f..11893254d 100644
--- a/swift/common/wsgi.py
+++ b/swift/common/wsgi.py
@@ -1452,7 +1452,8 @@ def make_env(env, method=None, path=None, agent='Swift', query_string=None,
                  'SERVER_PROTOCOL', 'swift.cache', 'swift.source',
                  'swift.trans_id', 'swift.authorize_override',
                  'swift.authorize', 'HTTP_X_USER_ID', 'HTTP_X_PROJECT_ID',
-                 'HTTP_REFERER', 'swift.infocache'):
+                 'HTTP_REFERER', 'swift.infocache',
+                 'swift.shard_listing_history'):
         if name in env:
             newenv[name] = env[name]
     if method:
diff --git a/swift/proxy/controllers/container.py b/swift/proxy/controllers/container.py
index 2235db757..be66f3abc 100644
--- a/swift/proxy/controllers/container.py
+++ b/swift/proxy/controllers/container.py
@@ -148,7 +148,19 @@ class ContainerController(Controller):
         return resp
 
     def _get_from_shards(self, req, resp):
-        # construct listing using shards described by the response body
+        # Construct listing using shards described by the response body.
+        # The history of containers that have returned shard ranges is
+        # maintained in the request environ so that loops can be avoided by
+        # forcing an object listing if the same container is visited again.
+        # This can happen in at least two scenarios:
+        #   1. a container has filled a gap in its shard ranges with a
+        #      shard range pointing to itself
+        #   2. a root container returns a (stale) shard range pointing to a
+        #      shard that has shrunk into the root, in which case the shrunken
+        #      shard may return the root's shard range.
+        shard_listing_history = req.environ.setdefault(
+            'swift.shard_listing_history', [])
+        shard_listing_history.append((self.account_name, self.container_name))
         shard_ranges = [ShardRange.from_dict(data)
                         for data in json.loads(resp.body)]
         self.app.logger.debug('GET listing from %s shards for: %s',
@@ -195,8 +207,8 @@ class ContainerController(Controller):
             else:
                 params['end_marker'] = str_to_wsgi(shard_range.end_marker)
 
-            if (shard_range.account == self.account_name and
-                    shard_range.container == self.container_name):
+            if ((shard_range.account, shard_range.container) in
+                    shard_listing_history):
                 # directed back to same container - force GET of objects
                 headers = {'X-Backend-Record-Type': 'object'}
             else:
diff --git a/test/unit/proxy/controllers/test_container.py b/test/unit/proxy/controllers/test_container.py
index ba0e4fff1..408f4a15b 100644
--- a/test/unit/proxy/controllers/test_container.py
+++ b/test/unit/proxy/controllers/test_container.py
@@ -501,7 +501,7 @@ class TestContainerController(TestRingBase):
                 self.assertEqual(dict(exp_params, format='json'), got_params)
                 for k, v in exp_headers.items():
                     self.assertIn(k, req['headers'])
-                    self.assertEqual(v, req['headers'][k])
+                    self.assertEqual(v, req['headers'][k], k)
                 self.assertNotIn('X-Backend-Override-Delete', req['headers'])
         return resp
 
@@ -938,6 +938,166 @@ class TestContainerController(TestRingBase):
             query_string='?delimiter=/')
         self.check_response(resp, root_resp_hdrs)
 
+    def test_GET_sharded_container_shard_redirects_to_root(self):
+        # check that if the root redirects listing to a shard, but the shard
+        # returns the root shard (e.g. it was the final shard to shrink into
+        # the root) objects are requested from the root, rather than a loop.
+
+        # single shard spanning entire namespace
+        shard_sr = ShardRange('.shards_a/c_xyz', Timestamp.now(), '', '')
+        all_objects = self._make_shard_objects(shard_sr)
+        size_all_objects = sum([obj['bytes'] for obj in all_objects])
+        num_all_objects = len(all_objects)
+        limit = CONTAINER_LISTING_LIMIT
+
+        # when shrinking the final shard will return the root shard range into
+        # which it is shrinking
+        shard_resp_hdrs = {
+            'X-Backend-Sharding-State': 'sharded',
+            'X-Container-Object-Count': 0,
+            'X-Container-Bytes-Used': 0,
+            'X-Backend-Storage-Policy-Index': 0,
+            'X-Backend-Record-Type': 'shard'
+        }
+
+        # root still thinks it has a shard
+        root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+                          'X-Backend-Timestamp': '99',
+                          'X-Container-Object-Count': num_all_objects,
+                          'X-Container-Bytes-Used': size_all_objects,
+                          'X-Backend-Storage-Policy-Index': 0}
+        root_shard_resp_hdrs = dict(root_resp_hdrs)
+        root_shard_resp_hdrs['X-Backend-Record-Type'] = 'shard'
+
+        root_sr = ShardRange('a/c', Timestamp.now(), '', '')
+        mock_responses = [
+            # status, body, headers
+            (200, [dict(shard_sr)], root_shard_resp_hdrs),  # from root
+            (200, [dict(root_sr)], shard_resp_hdrs),  # from shard
+            (200, all_objects, root_resp_hdrs),  # from root
+        ]
+        expected_requests = [
+            # path, headers, params
+            # first request to root should specify auto record type
+            ('a/c', {'X-Backend-Record-Type': 'auto'},
+             dict(states='listing')),
+            # request to shard should specify auto record type
+            (wsgi_quote(str_to_wsgi(shard_sr.name)),
+             {'X-Backend-Record-Type': 'auto'},
+             dict(marker='', end_marker='', limit=str(limit),
+                  states='listing')),  # 200
+            # second request to root should specify object record type
+            ('a/c', {'X-Backend-Record-Type': 'object'},
+             dict(marker='', end_marker='', limit=str(limit))),  # 200
+        ]
+
+        expected_objects = all_objects
+        resp = self._check_GET_shard_listing(
+            mock_responses, expected_objects, expected_requests)
+        self.check_response(resp, root_resp_hdrs,
+                            expected_objects=expected_objects)
+        self.assertEqual(
+            [('a', 'c'), ('.shards_a', 'c_xyz')],
+            resp.request.environ.get('swift.shard_listing_history'))
+
+    def test_GET_sharded_container_shard_redirects_between_shards(self):
+        # check that if one shard redirects listing to another shard that
+        # somehow redirects listing back to the first shard, then we will break
+        # out of the loop (this isn't an expected scenario, but could perhaps
+        # happen if multiple conflicting shard-shrinking decisions are made)
+        shard_bounds = ('', 'a', 'b', '')
+        shard_ranges = [
+            ShardRange('.shards_a/c_%s' % upper, Timestamp.now(), lower, upper)
+            for lower, upper in zip(shard_bounds[:-1], shard_bounds[1:])]
+        self.assertEqual([
+            '.shards_a/c_a',
+            '.shards_a/c_b',
+            '.shards_a/c_',
+        ], [sr.name for sr in shard_ranges])
+        sr_dicts = [dict(sr) for sr in shard_ranges]
+        sr_objs = [self._make_shard_objects(sr) for sr in shard_ranges]
+        all_objects = []
+        for objects in sr_objs:
+            all_objects.extend(objects)
+        size_all_objects = sum([obj['bytes'] for obj in all_objects])
+        num_all_objects = len(all_objects)
+
+        root_resp_hdrs = {'X-Backend-Sharding-State': 'sharded',
+                          'X-Backend-Timestamp': '99',
+                          'X-Container-Object-Count': num_all_objects,
+                          'X-Container-Bytes-Used': size_all_objects,
+                          'X-Backend-Storage-Policy-Index': 0,
+                          'X-Backend-Record-Type': 'shard',
+                          }
+        shard_resp_hdrs = {'X-Backend-Sharding-State': 'unsharded',
+                           'X-Container-Object-Count': 2,
+                           'X-Container-Bytes-Used': 4,
+                           'X-Backend-Storage-Policy-Index': 0}
+        shrinking_resp_hdrs = {
+            'X-Backend-Sharding-State': 'sharded',
+            'X-Backend-Record-Type': 'shard',
+        }
+        limit = CONTAINER_LISTING_LIMIT
+
+        mock_responses = [
+            # status, body, headers
+            (200, sr_dicts, root_resp_hdrs),  # from root
+            (200, sr_objs[0], shard_resp_hdrs),  # objects from 1st shard
+            (200, [sr_dicts[2]], shrinking_resp_hdrs),  # 2nd points to 3rd
+            (200, [sr_dicts[1]], shrinking_resp_hdrs),  # 3rd points to 2nd
+            (200, sr_objs[1], shard_resp_hdrs),  # objects from 2nd
+            (200, sr_objs[2], shard_resp_hdrs),  # objects from 3rd
+        ]
+        expected_requests = [
+            # each list item is tuple (path, headers, params)
+            # request to root
+            # context GET(a/c)
+            ('a/c', {'X-Backend-Record-Type': 'auto'},
+             dict(states='listing')),
+            # request to 1st shard as per shard list from root;
+            # context GET(a/c);
+            # end_marker dictated by 1st shard range upper bound
+            ('.shards_a/c_a', {'X-Backend-Record-Type': 'auto'},
+             dict(marker='', end_marker='a\x00', states='listing',
+                  limit=str(limit))),  # 200
+            # request to 2nd shard as per shard list from root;
+            # context GET(a/c);
+            # end_marker dictated by 2nd shard range upper bound
+            ('.shards_a/c_b', {'X-Backend-Record-Type': 'auto'},
+             dict(marker='a', end_marker='b\x00', states='listing',
+                  limit=str(limit - len(sr_objs[0])))),
+            # request to 3rd shard as per shard list from *2nd shard*;
+            # new context GET(a/c)->GET(.shards_a/c_b);
+            # end_marker still dictated by 2nd shard range upper bound
+            ('.shards_a/c_', {'X-Backend-Record-Type': 'auto'},
+             dict(marker='a', end_marker='b\x00', states='listing',
+                  limit=str(
+                      limit - len(sr_objs[0])))),
+            # request to 2nd shard as per shard list from *3rd shard*; this one
+            # should specify record type object;
+            # new context GET(a/c)->GET(.shards_a/c_b)->GET(.shards_a/c_);
+            # end_marker still dictated by 2nd shard range upper bound
+            ('.shards_a/c_b', {'X-Backend-Record-Type': 'object'},
+             dict(marker='a', end_marker='b\x00',
+                  limit=str(
+                      limit - len(sr_objs[0])))),
+            # request to 3rd shard *as per shard list from root*; this one
+            # should specify record type object;
+            # context GET(a/c);
+            # end_marker dictated by 3rd shard range upper bound
+            ('.shards_a/c_', {'X-Backend-Record-Type': 'object'},
+             dict(marker='b', end_marker='',
+                  limit=str(
+                      limit - len(sr_objs[0]) - len(sr_objs[1])))),  # 200
+        ]
+        resp = self._check_GET_shard_listing(
+            mock_responses, all_objects, expected_requests)
+        self.check_response(resp, root_resp_hdrs,
+                            expected_objects=all_objects)
+        self.assertEqual(
+            [('a', 'c'), ('.shards_a', 'c_b'), ('.shards_a', 'c_')],
+            resp.request.environ.get('swift.shard_listing_history'))
+
     def test_GET_sharded_container_overlapping_shards(self):
         # verify ordered listing even if unexpected overlapping shard ranges
         shard_bounds = (('', 'ham', ShardRange.CLEAVED),
author	Alistair Coles <alistairncoles@gmail.com>	2021-01-06 16:22:26 +0000
committer	Pete Zaitcev <zaitcev@kotori.zaitcev.us>	2021-02-23 21:00:52 -0600
commit	e69b3c60feb8c60cae5ec27508fd6f860d8c14e0 (patch)
tree	13ae50603d0b849010b780a5cb35a38fe51e1daf
parent	18cdf9b5680b6d9a2201e9781771e3a5efac0355 (diff)
download	swift-e69b3c60feb8c60cae5ec27508fd6f860d8c14e0.tar.gz