From 21c5f3e2e5eee162e9781f733f2eac7ebd94655f Mon Sep 17 00:00:00 2001 From: Surya Seetharaman Date: Thu, 16 Aug 2018 15:51:35 +0200 Subject: Making instance/migration listing skipping down cells configurable Presently if a cell is down, the instances in that cell are skipped from results. Sometimes this may not be desirable for operators as it may confuse the users who saw more instances in their previous listing than now. This patch adds a new api config option called list_records_by_skipping_down_cells which can be set to False (True by default) if the operator desires to just return an API error altogether if the user has any instance in the down cell instead of skipping. This is essentially a configurable revert of change I308b494ab07f6936bef94f4c9da45e9473e3534d for bug 1726301 so that operators can opt into the 500 response behaviour during listing. Change-Id: Id749761c58d4e1bc001b745d49b6ff0f3732e133 Related-Bug: #1726301 --- nova/compute/multi_cell_list.py | 8 +++++++ nova/conf/api.py | 8 +++++++ nova/tests/unit/compute/test_instance_list.py | 28 ++++++++++++++++++++++ ...ords-list-skip-down-cells-84d995e75c77c041.yaml | 16 +++++++++++++ 4 files changed, 60 insertions(+) create mode 100644 releasenotes/notes/records-list-skip-down-cells-84d995e75c77c041.yaml diff --git a/nova/compute/multi_cell_list.py b/nova/compute/multi_cell_list.py index b5c40e352e..99f29a41c1 100644 --- a/nova/compute/multi_cell_list.py +++ b/nova/compute/multi_cell_list.py @@ -19,13 +19,17 @@ import six from oslo_log import log as logging +import nova.conf from nova import context from nova import exception +from nova.i18n import _ LOG = logging.getLogger(__name__) CELL_FAIL_SENTINELS = (context.did_not_respond_sentinel, context.raised_exception_sentinel) +CONF = nova.conf.CONF + class RecordSortContext(object): def __init__(self, sort_keys, sort_dirs): @@ -400,6 +404,10 @@ class CrossCellLister(object): return if item._db_record in CELL_FAIL_SENTINELS: + if not CONF.api.list_records_by_skipping_down_cells: + raise exception.NovaException( + _('Cell %s is not responding but configuration ' + 'indicates that we should fail.') % item.cell_uuid) LOG.warning('Cell %s is not responding and hence is ' 'being omitted from the results', item.cell_uuid) diff --git a/nova/conf/api.py b/nova/conf/api.py index e542f2b25d..1c0395f43b 100644 --- a/nova/conf/api.py +++ b/nova/conf/api.py @@ -320,6 +320,14 @@ Related options: * instance_list_cells_batch_strategy * max_limit +"""), + cfg.BoolOpt("list_records_by_skipping_down_cells", + default=True, + help=""" +When set to False, this will cause the API to return a 500 error if there is an +infrastructure failure like non-responsive cells. If you want the API to skip +the down cells and return the results from the up cells set this option to +True. """), ] diff --git a/nova/tests/unit/compute/test_instance_list.py b/nova/tests/unit/compute/test_instance_list.py index c3be17dbb4..644d1e00bd 100644 --- a/nova/tests/unit/compute/test_instance_list.py +++ b/nova/tests/unit/compute/test_instance_list.py @@ -12,10 +12,12 @@ import mock from oslo_utils.fixture import uuidsentinel as uuids +import six from nova.compute import instance_list from nova.compute import multi_cell_list from nova import context as nova_context +from nova import exception from nova import objects from nova import test from nova.tests import fixtures @@ -185,6 +187,32 @@ class TestInstanceList(test.NoDBTestCase): # return the results from the up cell, ignoring the down cell. self.assertEqual(uuid_initial, uuid_final) + @mock.patch('nova.context.scatter_gather_cells') + def test_get_instances_by_not_skipping_down_cells(self, mock_sg): + self.flags(list_records_by_skipping_down_cells=False, group='api') + inst_cell0 = self.insts[uuids.cell0] + + def wrap(thing): + return multi_cell_list.RecordWrapper(ctx, self.context, thing) + + ctx = nova_context.RequestContext() + instances = [wrap(inst) for inst in inst_cell0] + + # creating one up cell and two down cells + ret_val = {} + ret_val[uuids.cell0] = instances + ret_val[uuids.cell1] = [wrap(nova_context.raised_exception_sentinel)] + ret_val[uuids.cell2] = [wrap(nova_context.did_not_respond_sentinel)] + mock_sg.return_value = ret_val + + # Raises exception if a cell is down without skipping them + # as CONF.api.list_records_by_skipping_down_cells is set to False. + # This would in turn result in an API 500 internal error. + exp = self.assertRaises(exception.NovaException, + instance_list.get_instance_objects_sorted, self.context, {}, None, + None, [], None, None) + self.assertIn('configuration indicates', six.text_type(exp)) + def test_batch_size_fixed(self): fixed_size = 200 self.flags(instance_list_cells_batch_strategy='fixed', group='api') diff --git a/releasenotes/notes/records-list-skip-down-cells-84d995e75c77c041.yaml b/releasenotes/notes/records-list-skip-down-cells-84d995e75c77c041.yaml new file mode 100644 index 0000000000..8db45ac693 --- /dev/null +++ b/releasenotes/notes/records-list-skip-down-cells-84d995e75c77c041.yaml @@ -0,0 +1,16 @@ +--- +other: + - | + In case of infrastructure failures like non-responsive cells, prior to + `change e3534d`_ we raised an API 500 error. However currently when + listing instances or migrations, we skip that cell and display results from + the up cells with the aim of increasing availability at the expense of + accuracy. If the old behaviour is desired, a new flag called + ``CONF.api.list_records_by_skipping_down_cells`` has been added which can + be set to False to mimic the old behavior. Both of these potential + behaviors will be unified in an upcoming microversion done through the + `blueprint handling-down-cell`_ where minimal constructs would be returned + for the down cell instances instead of raising 500s or skipping down cells. + + .. _change e3534d : https://review.openstack.org/#/q/I308b494ab07f6936bef94f4c9da45e9473e3534d + .. _blueprint handling-down-cell: https://blueprints.launchpad.net/nova/+spec/handling-down-cell -- cgit v1.2.1