diff options
author | Pavel Kvasnička <pavel.kvasnicka@firma.seznam.cz> | 2017-03-22 09:59:53 +0100 |
---|---|---|
committer | Ondřej Nový <ondrej.novy@firma.seznam.cz> | 2017-04-12 09:26:52 +0200 |
commit | 1582f8a559ac03be7d3976558e66339d8c3a1811 (patch) | |
tree | e3af1b3eff824a3bf20aeefe232ab14d854a26d8 | |
parent | b7c2a9196009ef78560acfb3826b9106f95d7fdf (diff) | |
download | swift-1582f8a559ac03be7d3976558e66339d8c3a1811.tar.gz |
Container drive error results double space usage on rest drives
When drive with container or account database is unmounted
replicator pushes database to handoff location. But this
handoff location finds replica with unmounted drive and
pushes database to the *next* handoff until all handoffs has
a replica - all container/account servers has replicas of
all unmounted drives.
This patch solves:
- Consumption of iterator on handoff location that results in
replication to the next and next handoff.
- StopIteration exception stopped not finished loop over
available handoffs if no more nodes exists for db replication
candidency.
Regression was introduced in 2.4.0 with rsync compression.
Co-Author: Kota Tsuyuzaki <tsuyuzaki.kota@lab.ntt.co.jp>
Change-Id: I344f9daaa038c6946be11e1cf8c4ef104a09e68b
Closes-Bug: 1675500
-rw-r--r-- | swift/common/db_replicator.py | 10 | ||||
-rw-r--r-- | test/unit/common/test_db_replicator.py | 44 |
2 files changed, 52 insertions, 2 deletions
diff --git a/swift/common/db_replicator.py b/swift/common/db_replicator.py index f9ca249ce..30c6a35c1 100644 --- a/swift/common/db_replicator.py +++ b/swift/common/db_replicator.py @@ -543,7 +543,7 @@ class Replicator(Daemon): more_nodes = self.ring.get_more_nodes(int(partition)) if not local_dev: # Check further if local device is a handoff node - for node in more_nodes: + for node in self.ring.get_more_nodes(int(partition)): if node['id'] == node_id: local_dev = node break @@ -559,7 +559,13 @@ class Replicator(Daemon): success = self._repl_to_node(node, broker, partition, info, different_region) except DriveNotMounted: - repl_nodes.append(next(more_nodes)) + try: + repl_nodes.append(next(more_nodes)) + except StopIteration: + self.logger.error( + _('ERROR There are not enough handoff nodes to reach ' + 'replica count for partition %s'), + partition) self.logger.error(_('ERROR Remote drive not mounted %s'), node) except (Exception, Timeout): self.logger.exception(_('ERROR syncing %(file)s with node' diff --git a/test/unit/common/test_db_replicator.py b/test/unit/common/test_db_replicator.py index 50d031cb0..50f83c2d7 100644 --- a/test/unit/common/test_db_replicator.py +++ b/test/unit/common/test_db_replicator.py @@ -643,6 +643,50 @@ class TestDBReplicator(unittest.TestCase): replicator._replicate_object('0', '/path/to/file', 'node_id') self.assertEqual(['/path/to/file'], self.delete_db_calls) + def test_replicate_object_with_exception(self): + replicator = TestReplicator({}) + replicator.ring = FakeRingWithNodes().Ring('path') + replicator.brokerclass = FakeAccountBroker + replicator.delete_db = self.stub_delete_db + replicator._repl_to_node = mock.Mock(side_effect=Exception()) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[0]['id']) + self.assertEqual(2, replicator._repl_to_node.call_count) + # with one DriveNotMounted exception called on +1 more replica + replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()]) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[0]['id']) + self.assertEqual(3, replicator._repl_to_node.call_count) + # called on +1 more replica and self when *first* handoff + replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()]) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[3]['id']) + self.assertEqual(4, replicator._repl_to_node.call_count) + # even if it's the last handoff it works to keep 3 replicas + # 2 primaries + 1 handoff + replicator._repl_to_node = mock.Mock(side_effect=[DriveNotMounted()]) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[-1]['id']) + self.assertEqual(4, replicator._repl_to_node.call_count) + # with two DriveNotMounted exceptions called on +2 more replica keeping + # durability + replicator._repl_to_node = mock.Mock( + side_effect=[DriveNotMounted()] * 2) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[0]['id']) + self.assertEqual(4, replicator._repl_to_node.call_count) + + def test_replicate_object_with_exception_run_out_of_nodes(self): + replicator = TestReplicator({}) + replicator.ring = FakeRingWithNodes().Ring('path') + replicator.brokerclass = FakeAccountBroker + replicator.delete_db = self.stub_delete_db + # all other devices are not mounted + replicator._repl_to_node = mock.Mock(side_effect=DriveNotMounted()) + replicator._replicate_object('0', '/path/to/file', + replicator.ring.devs[0]['id']) + self.assertEqual(5, replicator._repl_to_node.call_count) + def test_replicate_account_out_of_place(self): replicator = TestReplicator({}, logger=unit.FakeLogger()) replicator.ring = FakeRingWithNodes().Ring('path') |