From 09bdc87cbc1e7bc1918f9b5094bec266b6761d75 Mon Sep 17 00:00:00 2001 From: Christian Schwede Date: Thu, 11 Sep 2014 08:01:51 +0000 Subject: Return correct number of changed partitions When a ring is rebalanced the number of changed partitions is counted. Before this patch partitions might be rebalanced, but actually no data is moved - for example, when a partition is assigned to the same device as before. This results in a wrong number of reassigned partitions that is shown to the user. This patch remembers the partition allocation before the rebalance, and compares it to the new allocation after a rebalance. Only partitions that are stored on a different device than before are counted. Partial-Bug: 1367826 Also-By: Florent Flament Change-Id: Iacfd514df3af351791f9191cef78cff1b3e2645f --- swift/common/ring/builder.py | 31 +++++++++++++++++++++++++------ test/unit/common/ring/test_builder.py | 8 ++++++-- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/swift/common/ring/builder.py b/swift/common/ring/builder.py index 0250c264e..f3096f6e0 100644 --- a/swift/common/ring/builder.py +++ b/swift/common/ring/builder.py @@ -14,6 +14,7 @@ # limitations under the License. import bisect +import copy import itertools import math import random @@ -330,6 +331,7 @@ class RingBuilder(object): :returns: (number_of_partitions_altered, resulting_balance) """ + old_replica2part2dev = copy.deepcopy(self._replica2part2dev) if seed is not None: random.seed(seed) @@ -339,29 +341,46 @@ class RingBuilder(object): self._initial_balance() self.devs_changed = False return self.parts, self.get_balance() - retval = 0 + changed_parts = 0 self._update_last_part_moves() last_balance = 0 new_parts, removed_part_count = self._adjust_replica2part2dev_size() - retval += removed_part_count + changed_parts += removed_part_count if new_parts or removed_part_count: self._set_parts_wanted() self._reassign_parts(new_parts) - retval += len(new_parts) + changed_parts += len(new_parts) while True: reassign_parts = self._gather_reassign_parts() self._reassign_parts(reassign_parts) - retval += len(reassign_parts) + changed_parts += len(reassign_parts) while self._remove_devs: self.devs[self._remove_devs.pop()['id']] = None balance = self.get_balance() if balance < 1 or abs(last_balance - balance) < 1 or \ - retval == self.parts: + changed_parts == self.parts: break last_balance = balance self.devs_changed = False self.version += 1 - return retval, balance + + # Compare the partition allocation before and after the rebalance + # Only changed device ids are taken into account; devices might be + # "touched" during the rebalance, but actually not really moved + changed_parts = 0 + for rep_id, _rep in enumerate(self._replica2part2dev): + for part_id, new_device in enumerate(_rep): + # IndexErrors will be raised if the replicas are increased or + # decreased, and that actually means the partition has changed + try: + old_device = old_replica2part2dev[rep_id][part_id] + except IndexError: + changed_parts += 1 + continue + + if old_device != new_device: + changed_parts += 1 + return changed_parts, balance def validate(self, stats=False): """ diff --git a/test/unit/common/ring/test_builder.py b/test/unit/common/ring/test_builder.py index de05480a5..6476b4cf8 100644 --- a/test/unit/common/ring/test_builder.py +++ b/test/unit/common/ring/test_builder.py @@ -703,14 +703,14 @@ class TestRingBuilder(unittest.TestCase): 'ip': '127.0.0.1', 'port': 10000, 'device': 'sda1'}) rb.add_dev({'id': 1, 'region': 0, 'zone': 1, 'weight': 2, 'ip': '127.0.0.1', 'port': 10001, 'device': 'sda1'}) - rb.rebalance() + rb.rebalance(seed=2) rb.add_dev({'id': 2, 'region': 1, 'zone': 0, 'weight': 0.25, 'ip': '127.0.0.1', 'port': 10003, 'device': 'sda1'}) rb.add_dev({'id': 3, 'region': 1, 'zone': 1, 'weight': 0.25, 'ip': '127.0.0.1', 'port': 10004, 'device': 'sda1'}) rb.pretend_min_part_hours_passed() - rb.rebalance(seed=2) + changed_parts, _balance = rb.rebalance(seed=2) # there's not enough room in r1 for every partition to have a replica # in it, so only 86 assignments occur in r1 (that's ~1/5 of the total, @@ -718,6 +718,10 @@ class TestRingBuilder(unittest.TestCase): population_by_region = self._get_population_by_region(rb) self.assertEquals(population_by_region, {0: 682, 1: 86}) + # Rebalancing will reassign 143 of the partitions, which is ~1/5 + # of the total amount of partitions (3*256) + self.assertEqual(143, changed_parts) + # and since there's not enough room, subsequent rebalances will not # cause additional assignments to r1 rb.pretend_min_part_hours_passed() -- cgit v1.2.1