diff options
-rw-r--r-- | doc/manpages/swift-recon.1 | 2 | ||||
-rw-r--r-- | doc/source/admin_guide.rst | 3 | ||||
-rw-r--r-- | swift/cli/recon.py | 118 | ||||
-rw-r--r-- | swift/common/middleware/recon.py | 9 | ||||
-rw-r--r-- | test/unit/cli/test_recon.py | 83 | ||||
-rw-r--r-- | test/unit/common/middleware/test_recon.py | 26 |
6 files changed, 202 insertions, 39 deletions
diff --git a/doc/manpages/swift-recon.1 b/doc/manpages/swift-recon.1 index c5a30b6d4..dc5a19d00 100644 --- a/doc/manpages/swift-recon.1 +++ b/doc/manpages/swift-recon.1 @@ -58,6 +58,8 @@ Get updater stats Get expirer stats .IP "\fB-r, --replication\fR" Get replication stats +.IP "\fB-R, --reconstruction\fR" +Get reconstruction stats .IP "\fB-u, --unmounted\fR" Check cluster for unmounted devices .IP "\fB-d, --diskusage\fR" diff --git a/doc/source/admin_guide.rst b/doc/source/admin_guide.rst index d44f67138..c3f4a1078 100644 --- a/doc/source/admin_guide.rst +++ b/doc/source/admin_guide.rst @@ -764,7 +764,7 @@ This information can also be queried via the swift-recon command line utility:: fhines@ubuntu:~$ swift-recon -h Usage: usage: swift-recon <server_type> [-v] [--suppress] [-a] [-r] [-u] [-d] - [-l] [-T] [--md5] [--auditor] [--updater] [--expirer] [--sockstat] + [-R] [-l] [-T] [--md5] [--auditor] [--updater] [--expirer] [--sockstat] <server_type> account|container|object Defaults to object server. @@ -778,6 +778,7 @@ This information can also be queried via the swift-recon command line utility:: --suppress Suppress most connection related errors -a, --async Get async stats -r, --replication Get replication stats + -R, --reconstruction Get reconstruction stats --auditor Get auditor stats --updater Get updater stats --expirer Get expirer stats diff --git a/swift/cli/recon.py b/swift/cli/recon.py index 63f8afb3f..cd0952875 100644 --- a/swift/cli/recon.py +++ b/swift/cli/recon.py @@ -431,6 +431,78 @@ class SwiftRecon(object): print("[%s] - No hosts returned valid data." % k) print("=" * 79) + def _calculate_least_and_most_recent(self, url_time_data): + """calulate and print the least and most recent urls + + Given a list of url and time tuples calulate the most and least + recent timings and print it out. + :param url_time_data: list of url and time tuples: [(url, time_), ..] + """ + least_recent_time = 9999999999 + least_recent_url = None + most_recent_time = 0 + most_recent_url = None + + for url, last in url_time_data: + if last is None: + continue + if last < least_recent_time: + least_recent_time = last + least_recent_url = url + if last > most_recent_time: + most_recent_time = last + most_recent_url = url + + if least_recent_url is not None: + host = urlparse(least_recent_url).netloc + if not least_recent_time: + print('Oldest completion was NEVER by %s.' % host) + else: + elapsed = time.time() - least_recent_time + elapsed, elapsed_unit = seconds2timeunit(elapsed) + print('Oldest completion was %s (%d %s ago) by %s.' % ( + self._ptime(least_recent_time), + elapsed, elapsed_unit, host)) + if most_recent_url is not None: + host = urlparse(most_recent_url).netloc + elapsed = time.time() - most_recent_time + elapsed, elapsed_unit = seconds2timeunit(elapsed) + print('Most recent completion was %s (%d %s ago) by %s.' % ( + self._ptime(most_recent_time), + elapsed, elapsed_unit, host)) + + def reconstruction_check(self, hosts): + """ + Obtain and print reconstructon statistics + + :param hosts: set of hosts to check. in the format of: + set([('127.0.0.1', 6020), ('127.0.0.2', 6030)]) + """ + stats = [] + last_stats = [] + recon = Scout("reconstruction/%s" % self.server_type, self.verbose, + self.suppress_errors, self.timeout) + print("[%s] Checking on reconstructors" % self._ptime()) + for url, response, status, ts_start, ts_end in self.pool.imap( + recon.scout, hosts): + if status == 200: + stats.append(response.get('object_reconstruction_time')) + last = response.get('object_reconstruction_last', 0) + last_stats.append((url, last)) + if stats: + computed = self._gen_stats(stats, + name='object_reconstruction_time') + if computed['reported'] > 0: + self._print_stats(computed) + else: + print("[object_reconstruction_time] - No hosts returned " + "valid data.") + else: + print("[object_reconstruction_time] - No hosts returned " + "valid data.") + self._calculate_least_and_most_recent(last_stats) + print("=" * 79) + def replication_check(self, hosts): """ Obtain and print replication statistics @@ -440,13 +512,10 @@ class SwiftRecon(object): """ stats = {'replication_time': [], 'failure': [], 'success': [], 'attempted': []} + last_stats = [] recon = Scout("replication/%s" % self.server_type, self.verbose, self.suppress_errors, self.timeout) print("[%s] Checking on replication" % self._ptime()) - least_recent_time = 9999999999 - least_recent_url = None - most_recent_time = 0 - most_recent_url = None for url, response, status, ts_start, ts_end in self.pool.imap( recon.scout, hosts): if status == 200: @@ -459,14 +528,7 @@ class SwiftRecon(object): stats[stat_key].append(repl_stats.get(stat_key)) last = response.get('replication_last', response.get('object_replication_last', 0)) - if last is None: - continue - if last < least_recent_time: - least_recent_time = last - least_recent_url = url - if last > most_recent_time: - most_recent_time = last - most_recent_url = url + last_stats.append((url, last)) for k in stats: if stats[k]: if k != 'replication_time': @@ -480,23 +542,7 @@ class SwiftRecon(object): print("[%s] - No hosts returned valid data." % k) else: print("[%s] - No hosts returned valid data." % k) - if least_recent_url is not None: - host = urlparse(least_recent_url).netloc - if not least_recent_time: - print('Oldest completion was NEVER by %s.' % host) - else: - elapsed = time.time() - least_recent_time - elapsed, elapsed_unit = seconds2timeunit(elapsed) - print('Oldest completion was %s (%d %s ago) by %s.' % ( - self._ptime(least_recent_time), - elapsed, elapsed_unit, host)) - if most_recent_url is not None: - host = urlparse(most_recent_url).netloc - elapsed = time.time() - most_recent_time - elapsed, elapsed_unit = seconds2timeunit(elapsed) - print('Most recent completion was %s (%d %s ago) by %s.' % ( - self._ptime(most_recent_time), - elapsed, elapsed_unit, host)) + self._calculate_least_and_most_recent(last_stats) print("=" * 79) def updater_check(self, hosts): @@ -1036,7 +1082,7 @@ class SwiftRecon(object): print("=" * 79) usage = ''' usage: %prog <server_type> [<server_type> [<server_type>]] - [-v] [--suppress] [-a] [-r] [-u] [-d] + [-v] [--suppress] [-a] [-r] [-u] [-d] [-R] [-l] [-T] [--md5] [--auditor] [--updater] [--expirer] [--sockstat] [--human-readable] @@ -1055,6 +1101,8 @@ class SwiftRecon(object): help="Get async stats") args.add_option('--replication', '-r', action="store_true", help="Get replication stats") + args.add_option('--reconstruction', '-R', action="store_true", + help="Get reconstruction stats") args.add_option('--auditor', action="store_true", help="Get auditor stats") args.add_option('--updater', action="store_true", @@ -1094,7 +1142,7 @@ class SwiftRecon(object): help='Also show the lowest COUNT entries in rank \ order.') args.add_option('--all', action="store_true", - help="Perform all checks. Equal to \t\t\t-arudlqT " + help="Perform all checks. Equal to \t\t\t-arRudlqT " "--md5 --sockstat --auditor --updater --expirer " "--driveaudit --validate-servers --swift-versions") args.add_option('--region', type="int", @@ -1152,6 +1200,7 @@ class SwiftRecon(object): self.object_auditor_check(hosts) self.updater_check(hosts) self.expirer_check(hosts) + self.reconstruction_check(hosts) elif self.server_type == 'container': self.auditor_check(hosts) self.updater_check(hosts) @@ -1209,6 +1258,13 @@ class SwiftRecon(object): print("Error: Can't check sharding on non container " "servers.") print("=" * 79) + if options.reconstruction: + if self.server_type == 'object': + self.reconstruction_check(hosts) + else: + print("Error: Can't check reconstruction stats on " + "non object servers.") + print("=" * 79) if options.validate_servers: self.server_type_check(hosts) if options.loadstats: diff --git a/swift/common/middleware/recon.py b/swift/common/middleware/recon.py index e1b5d7e57..2b5817987 100644 --- a/swift/common/middleware/recon.py +++ b/swift/common/middleware/recon.py @@ -169,6 +169,13 @@ class ReconMiddleware(object): else: return None + def get_reconstruction_info(self): + """get reconstruction info""" + reconstruction_list = ['object_reconstruction_last', + 'object_reconstruction_time'] + return self._from_recon_cache(reconstruction_list, + self.object_recon_cache) + def get_device_info(self): """get devices""" try: @@ -399,6 +406,8 @@ class ReconMiddleware(object): content = self.get_sharding_info() elif rcheck == "relinker": content = self.get_relinker_info() + elif rcheck == "reconstruction" and rtype == 'object': + content = self.get_reconstruction_info() else: content = "Invalid path: %s" % req.path return Response(request=req, status="404 Not Found", diff --git a/test/unit/cli/test_recon.py b/test/unit/cli/test_recon.py index b5290ad91..421e68aeb 100644 --- a/test/unit/cli/test_recon.py +++ b/test/unit/cli/test_recon.py @@ -648,6 +648,45 @@ aliases = %s self.assertRaises(SystemExit, recon.main) self.assertIn('Invalid Storage Policy', stdout.getvalue()) + def test_calculate_least_and_most_recent(self): + now = 1517894596 + + def test_least_most(data, expected): + stdout = StringIO() + with mock.patch('sys.stdout', new=stdout), \ + mock.patch('time.time', return_value=now): + self.recon_instance._calculate_least_and_most_recent(data) + self.assertEqual(stdout.getvalue(), expected) + + # first the empty set + test_least_most([], '') + expected = 'Oldest completion was NEVER by my.url.\n' + test_least_most([('http://my.url/is/awesome', 0)], expected) + + expected = ( + 'Oldest completion was 2018-02-06 05:23:11 (5 seconds ago) ' + 'by my.url.\n' + 'Most recent completion was 2018-02-06 05:23:11 (5 seconds ago) ' + 'by my.url.\n') + data = [('http://my.url/is/awesome', now - 5)] + test_least_most(data, expected) + + expected = ( + 'Oldest completion was 2018-02-06 05:06:36 (16 minutes ago) ' + 'by a.diff.url.\n' + 'Most recent completion was 2018-02-06 05:23:11 (5 seconds ago) ' + 'by my.url.\n') + data.append(('http://a.diff.url/not/as/awesome', now - 1000)) + test_least_most(data, expected) + + # now through larger sets at it + for extra in (5, 10, 40, 100): + data.extend([ + ('http://extra.%d.url/blah' % (extra + r), + now - random.randint(6, 999)) for r in range(extra)]) + random.shuffle(data) + test_least_most(data, expected) + class TestReconCommands(unittest.TestCase): def setUp(self): @@ -1067,6 +1106,41 @@ class TestReconCommands(unittest.TestCase): cli.sharding_check([('127.0.0.1', 6011), ('127.0.0.1', 6021)]) mock_print.assert_has_calls(default_calls, any_order=True) + @ mock.patch('six.moves.builtins.print') + @ mock.patch('time.time') + def test_reconstruction_check(self, mock_now, mock_print): + now = 1430000000.0 + + def dummy_request(*args, **kwargs): + return [ + ('http://127.0.0.1:6011/recon/reconstruction', + {"object_reconstruction_last": now, + "object_reconstruction_time": 42}, + 200, 0, 0), + ('http://127.0.0.1:6021/recon/reconstruction', + {"object_reconstruction_last": now, + "object_reconstruction_time": 23}, + 200, 0, 0)] + + cli = recon.SwiftRecon() + cli.pool.imap = dummy_request + + default_calls = [ + mock.call('[object_reconstruction_time] low: 23, high: 42, ' + 'avg: 32.5, total: 65, Failed: 0.0%, no_result: 0, ' + 'reported: 2'), + mock.call('Oldest completion was 2015-04-25 22:13:20 ' + + '(42 seconds ago) by 127.0.0.1:6011.'), + mock.call('Most recent completion was 2015-04-25 22:13:20 ' + + '(42 seconds ago) by 127.0.0.1:6011.'), + ] + + mock_now.return_value = now + 42 + cli.reconstruction_check([('127.0.0.1', 6011), ('127.0.0.1', 6021)]) + # We need any_order=True because the order of calls depends on the dict + # that is returned from the recon middleware, thus can't rely on it + mock_print.assert_has_calls(default_calls, any_order=True) + @mock.patch('six.moves.builtins.print') @mock.patch('time.time') def test_load_check(self, mock_now, mock_print): @@ -1077,16 +1151,11 @@ class TestReconCommands(unittest.TestCase): ('http://127.0.0.1:6010/recon/load', {"1m": 0.2, "5m": 0.4, "15m": 0.25, "processes": 10000, "tasks": "1/128"}, - 200, - 0, - 0), + 200, 0, 0), ('http://127.0.0.1:6020/recon/load', {"1m": 0.4, "5m": 0.8, "15m": 0.75, "processes": 9000, "tasks": "1/200"}, - 200, - 0, - 0), - ] + 200, 0, 0)] cli = recon.SwiftRecon() cli.pool.imap = dummy_request diff --git a/test/unit/common/middleware/test_recon.py b/test/unit/common/middleware/test_recon.py index 10ef95dd4..4b4656d26 100644 --- a/test/unit/common/middleware/test_recon.py +++ b/test/unit/common/middleware/test_recon.py @@ -168,6 +168,9 @@ class FakeRecon(object): def fake_relinker(self): return {"relinktest": "1"} + def fake_reconstruction(self): + return {'reconstructiontest': "1"} + def fake_updater(self, recon_type): self.fake_updater_rtype = recon_type return {'updatertest': "1"} @@ -807,6 +810,21 @@ class TestReconSuccess(TestCase): rv = self.app.get_replication_info('unrecognized_recon_type') self.assertIsNone(rv) + def test_get_reconstruction(self): + from_cache_response = { + "object_reconstruction_time": 0.2615511417388916, + "object_reconstruction_last": 1357969645.25} + self.fakecache.fakeout_calls = [] + self.fakecache.fakeout = from_cache_response + rv = self.app.get_reconstruction_info() + self.assertEqual(self.fakecache.fakeout_calls, + [((['object_reconstruction_last', + 'object_reconstruction_time'], + '/var/cache/swift/object.recon'), {})]) + self.assertEqual(rv, { + "object_reconstruction_time": 0.2615511417388916, + "object_reconstruction_last": 1357969645.25}) + def test_get_updater_info_container(self): from_cache_response = {"container_updater_sweep": 18.476239919662476} self.fakecache.fakeout_calls = [] @@ -1333,6 +1351,7 @@ class TestReconMiddleware(unittest.TestCase): self.app.get_async_info = self.frecon.fake_async self.app.get_device_info = self.frecon.fake_get_device_info self.app.get_replication_info = self.frecon.fake_replication + self.app.get_reconstruction_info = self.frecon.fake_reconstruction self.app.get_auditor_info = self.frecon.fake_auditor self.app.get_updater_info = self.frecon.fake_updater self.app.get_expirer_info = self.frecon.fake_expirer @@ -1380,6 +1399,13 @@ class TestReconMiddleware(unittest.TestCase): resp = self.app(req.environ, start_response) self.assertEqual(resp, get_device_resp) + def test_reconstruction_info(self): + get_reconstruction_resp = [b'{"reconstructiontest": "1"}'] + req = Request.blank('/recon/reconstruction/object', + environ={'REQUEST_METHOD': 'GET'}) + resp = self.app(req.environ, start_response) + self.assertEqual(resp, get_reconstruction_resp) + def test_recon_get_replication_notype(self): get_replication_resp = [b'{"replicationtest": "1"}'] req = Request.blank('/recon/replication', |