summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Hsu <charles0126@gmail.com>2016-08-11 00:53:13 +0800
committerTim Burke <tim.burke@gmail.com>2016-08-31 17:18:23 +0000
commit2c39cc9990fc2a2f3fac9a15c79c10e8fdb1b1fc (patch)
tree22b22629a4224b95616653eb8e8f9c6b401cfd2b
parentd75e6d39789e1cc83ac0614f5f4697e1df2f1f57 (diff)
downloadswift-2c39cc9990fc2a2f3fac9a15c79c10e8fdb1b1fc.tar.gz
Ignore auditor status files to prevent replicator reports errors
Ignore `auditor_status_*.json` files during the collecting jobs and replicator won't use these wrong paths to find objects that causes an exception to increase failure count in replicator report. Co-Authored-By: Clay Gerrard <clay.gerrard@gmail.com> Co-Authored-By: Mark Kirkwood <mark.kirkwood@catalyst.net.nz> Change-Id: Ib15a0987288d9ee32432c1998aefe638ca3b223b Closes-Bug: #1583305 (cherry picked from commit 65b1820407ea40bd7d65a5356a58a689befe3cb5)
-rw-r--r--swift/obj/replicator.py9
-rw-r--r--test/unit/obj/test_replicator.py37
2 files changed, 42 insertions, 4 deletions
diff --git a/swift/obj/replicator.py b/swift/obj/replicator.py
index ff89b3213..104f5c396 100644
--- a/swift/obj/replicator.py
+++ b/swift/obj/replicator.py
@@ -356,12 +356,12 @@ class ObjectReplicator(Daemon):
handoff_partition_deleted = True
except (Exception, Timeout):
self.logger.exception(_("Error syncing handoff partition"))
+ self._add_failure_stats(failure_devs_info)
finally:
target_devs_info = set([(target_dev['replication_ip'],
target_dev['device'])
for target_dev in job['nodes']])
self.stats['success'] += len(target_devs_info - failure_devs_info)
- self._add_failure_stats(failure_devs_info)
if not handoff_partition_deleted:
self.handoffs_remaining += 1
self.partition_times.append(time.time() - begin)
@@ -489,10 +489,10 @@ class ObjectReplicator(Daemon):
self.suffix_count += len(local_hash)
except (Exception, Timeout):
failure_devs_info.update(target_devs_info)
+ self._add_failure_stats(failure_devs_info)
self.logger.exception(_("Error syncing partition"))
finally:
self.stats['success'] += len(target_devs_info - failure_devs_info)
- self._add_failure_stats(failure_devs_info)
self.partition_times.append(time.time() - begin)
self.logger.timing_since('partition.update.timing', begin)
@@ -611,6 +611,11 @@ class ObjectReplicator(Daemon):
and partition not in override_partitions):
continue
+ if (partition.startswith('auditor_status_') and
+ partition.endswith('.json')):
+ # ignore auditor status files
+ continue
+
part_nodes = None
try:
job_path = join(obj_path, partition)
diff --git a/test/unit/obj/test_replicator.py b/test/unit/obj/test_replicator.py
index a32fa9980..413c63149 100644
--- a/test/unit/obj/test_replicator.py
+++ b/test/unit/obj/test_replicator.py
@@ -234,7 +234,7 @@ class TestObjectReplicator(unittest.TestCase):
config,
))
- def _write_disk_data(self, disk_name):
+ def _write_disk_data(self, disk_name, with_json=False):
os.mkdir(os.path.join(self.devices, disk_name))
objects = os.path.join(self.devices, disk_name,
diskfile.get_data_dir(POLICIES[0]))
@@ -250,6 +250,13 @@ class TestObjectReplicator(unittest.TestCase):
parts_1[part] = os.path.join(objects_1, part)
os.mkdir(parts_1[part])
+ if with_json:
+ for json_file in ['auditor_status_ZBF.json',
+ 'auditor_status_ALL.json']:
+ for obj_dir in [objects, objects_1]:
+ with open(os.path.join(obj_dir, json_file), 'w'):
+ pass
+
return objects, objects_1, parts, parts_1
def _create_replicator(self):
@@ -417,6 +424,32 @@ class TestObjectReplicator(unittest.TestCase):
self.assertEqual(jobs_by_pol_part[part]['path'],
os.path.join(self.objects_1, part[1:]))
+ def test_collect_jobs_failure_report_with_auditor_stats_json(self):
+ devs = [
+ {'id': 0, 'device': 'sda', 'zone': 0,
+ 'region': 1, 'ip': '1.1.1.1', 'port': 1111,
+ 'replication_ip': '127.0.0.0', 'replication_port': 6000},
+ {'id': 1, 'device': 'sdb', 'zone': 1,
+ 'region': 1, 'ip': '1.1.1.1', 'port': 1111,
+ 'replication_ip': '127.0.0.0', 'replication_port': 6000},
+ {'id': 2, 'device': 'sdc', 'zone': 2,
+ 'region': 1, 'ip': '1.1.1.1', 'port': 1111,
+ 'replication_ip': '127.0.0.1', 'replication_port': 6000},
+ {'id': 3, 'device': 'sdd', 'zone': 3,
+ 'region': 1, 'ip': '1.1.1.1', 'port': 1111,
+ 'replication_ip': '127.0.0.1', 'replication_port': 6000},
+ ]
+ objects_sdb, objects_1_sdb, _, _ = \
+ self._write_disk_data('sdb', with_json=True)
+ objects_sdc, objects_1_sdc, _, _ = \
+ self._write_disk_data('sdc', with_json=True)
+ objects_sdd, objects_1_sdd, _, _ = \
+ self._write_disk_data('sdd', with_json=True)
+ _create_test_rings(self.testdir, devs)
+
+ self.replicator.collect_jobs()
+ self.assertEqual(self.replicator.stats['failure'], 0)
+
@mock.patch('swift.obj.replicator.random.shuffle', side_effect=lambda l: l)
def test_collect_jobs_multi_disk(self, mock_shuffle):
devs = [
@@ -1555,7 +1588,7 @@ class TestObjectReplicator(unittest.TestCase):
return 2, {'abc': 'def'}
def fake_exc(tester, *args, **kwargs):
- if 'Error syncing partition' in args[0]:
+ if 'Error syncing partition timeout' in args[0]:
tester.i_failed = True
self.i_failed = False