summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBence Romsics <bence.romsics@gmail.com>2023-03-06 13:04:01 +0100
committerBence Romsics <bence.romsics@gmail.com>2023-05-02 15:40:57 +0200
commit080770cd7b0331e708d54970cdda5fb6b3bc1b20 (patch)
tree29d9c1913aa8f9a42a16d7f32160404b668838ec
parent21a30c803b754c6e55cc8203dc3ee8a2d2cfd8e6 (diff)
downloadneutron-stable/victoria.tar.gz
Suppress IPv6 metadata DAD failure and delete addressstable/victoria
IPv4 DAD is non-existent in Linux or its failure is silent, so we never needed to catch and ignore it. On the other hand IPv6 DAD failure is explicit, hence comes this change. This of course leaves the metadata service dead on hosts where duplicate address detection failed. But if we catch the DADFailed exception and delete the address, at least other functions of the dhcp-agent should not be affected. With this the IPv6 isolated metadata service is not redundant, which is the best we can do without a redesign. Also document the promised service level of isolated metadata. Added additional tests for the metadata driver as well. Change-Id: I6b544c5528cb22e5e8846fc47dfb8b05f70f975c Partial-Bug: #1953165 (cherry picked from commit 2aee961ab6942ab59aeacdc93d918c8c19023041) (cherry picked from commit 071255f098e0e73fd5220f83cbbc8ac1c421f3ab) (cherry picked from commit 1c615281f7632f3f1cf4bd37eefe90c50c6dfe25) (cherry picked from commit defb6018f3a395094cc85a03b93a2a0b43d2f6ff) (cherry picked from commit 1d674825ebbe5fcab6c8fef7d03b5cf9b332b743) (cherry picked from commit f53cff4a9c57bb39db8baf3f4a41ade085af98b4)
-rw-r--r--doc/source/admin/config-dhcp-ha.rst32
-rw-r--r--neutron/agent/linux/dhcp.py3
-rw-r--r--neutron/agent/linux/ip_lib.py8
-rw-r--r--neutron/agent/metadata/driver.py30
-rw-r--r--neutron/common/_constants.py3
-rw-r--r--neutron/conf/agent/database/agentschedulers_db.py4
-rw-r--r--neutron/tests/unit/agent/dhcp/test_agent.py3
-rw-r--r--neutron/tests/unit/agent/linux/test_dhcp.py3
-rw-r--r--neutron/tests/unit/agent/linux/test_ip_lib.py2
-rw-r--r--neutron/tests/unit/agent/metadata/test_driver.py62
-rw-r--r--releasenotes/notes/bug-1953165-6e848ea2c0398f56.yaml16
11 files changed, 139 insertions, 27 deletions
diff --git a/doc/source/admin/config-dhcp-ha.rst b/doc/source/admin/config-dhcp-ha.rst
index 777dcc4905..4711d82cfc 100644
--- a/doc/source/admin/config-dhcp-ha.rst
+++ b/doc/source/admin/config-dhcp-ha.rst
@@ -442,6 +442,38 @@ To test the HA of DHCP agent:
#. Start DHCP agent on HostB. The VM gets the wanted IP again.
+No HA for metadata service on isolated networks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All Neutron backends using the DHCP agent can also provide `metadata service
+<https://docs.openstack.org/nova/latest/user/metadata.html>`_ in isolated
+networks (i.e. networks without a router). In this case the DHCP agent manages
+the metadata service (see config option `enable_isolated_metadata
+<https://docs.openstack.org/neutron/latest/configuration/dhcp-agent.html#DEFAULT.enable_isolated_metadata>`_).
+
+Note however that the metadata service is only redundant for IPv4, and not
+IPv6, even when the DHCP service is configured to be highly available
+(config option `dhcp_agents_per_network
+<https://docs.openstack.org/neutron/latest/configuration/neutron.html#DEFAULT.dhcp_agents_per_network>`_
+> 1). This is because the DHCP agent will insert a route to the well known
+metadata IPv4 address (`169.254.169.254`) via its own IP address, so it will
+be reachable as long as the DHCP service is available at that IP address.
+This also means that recovery after a failure is tied to the renewal of the
+DHCP lease, since that route will only change if the DHCP server for a VM
+changes.
+
+With IPv6, the well known metadata IPv6 address (`fe80::a9fe:a9fe`) is used,
+but directly configured in the DHCP agent network namespace.
+Due to the enforcement of duplicate address detection (DAD), this address
+can only be configured in at most one DHCP network namespaces at any time.
+See `RFC 4862 <https://www.rfc-editor.org/rfc/rfc4862#section-5.4>`_ for
+details on the DAD process.
+
+For this reason, even when you have multiple DHCP agents, an arbitrary one
+(where the metadata IPv6 address is not in `dadfailed` state) will serve all
+metadata requests over IPv6. When that metadata service instance becomes
+unreachable there is no failover and the service will become unreachable.
+
Disabling and removing an agent
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/neutron/agent/linux/dhcp.py b/neutron/agent/linux/dhcp.py
index 1611f6f771..9ecd9d9d1a 100644
--- a/neutron/agent/linux/dhcp.py
+++ b/neutron/agent/linux/dhcp.py
@@ -41,6 +41,7 @@ from neutron.agent.linux import external_process
from neutron.agent.linux import ip_lib
from neutron.agent.linux import iptables_manager
from neutron.cmd import runtime_checks as checks
+from neutron.common import _constants as common_constants
from neutron.common import utils as common_utils
from neutron.ipam import utils as ipam_utils
from neutron.privileged.agent.linux import dhcp as priv_dhcp
@@ -1768,7 +1769,7 @@ class DeviceManager(object):
if self.conf.force_metadata or self.conf.enable_isolated_metadata:
ip_cidrs.append(constants.METADATA_CIDR)
if netutils.is_ipv6_enabled():
- ip_cidrs.append(constants.METADATA_V6_CIDR)
+ ip_cidrs.append(common_constants.METADATA_V6_CIDR)
self.driver.init_l3(interface_name, ip_cidrs,
namespace=network.namespace)
diff --git a/neutron/agent/linux/ip_lib.py b/neutron/agent/linux/ip_lib.py
index 11638c5b4d..2bb18bb860 100644
--- a/neutron/agent/linux/ip_lib.py
+++ b/neutron/agent/linux/ip_lib.py
@@ -101,6 +101,10 @@ class AddressNotReady(exceptions.NeutronException):
"become ready: %(reason)s")
+class DADFailed(AddressNotReady):
+ pass
+
+
InvalidArgument = privileged.InvalidArgument
@@ -593,7 +597,7 @@ class IpAddrCommand(IpDeviceCommandBase):
"""Wait until an address is no longer marked 'tentative' or 'dadfailed'
raises AddressNotReady if times out, address not present on interface
- or DAD fails
+ raises DADFailed if Duplicate Address Detection fails
"""
def is_address_ready():
try:
@@ -605,7 +609,7 @@ class IpAddrCommand(IpDeviceCommandBase):
# Since both 'dadfailed' and 'tentative' will be set if DAD fails,
# check 'dadfailed' first just to be explicit
if addr_info['dadfailed']:
- raise AddressNotReady(
+ raise DADFailed(
address=address, reason=_('Duplicate address detected'))
if addr_info['tentative']:
return False
diff --git a/neutron/agent/metadata/driver.py b/neutron/agent/metadata/driver.py
index 0a7fb5f552..aea06f8de0 100644
--- a/neutron/agent/metadata/driver.py
+++ b/neutron/agent/metadata/driver.py
@@ -33,6 +33,7 @@ from neutron.agent.l3 import namespaces
from neutron.agent.linux import external_process
from neutron.agent.linux import ip_lib
from neutron.agent.linux import utils as linux_utils
+from neutron.common import _constants as common_constants
from neutron.common import coordination
from neutron.common import utils as common_utils
@@ -266,9 +267,30 @@ class MetadataDriver(object):
# HAProxy cannot bind() until IPv6 Duplicate Address Detection
# completes. We must wait until the address leaves its 'tentative'
# state.
- ip_lib.IpAddrCommand(
- parent=ip_lib.IPDevice(name=bind_interface, namespace=ns_name)
- ).wait_until_address_ready(address=bind_address_v6)
+ try:
+ ip_lib.IpAddrCommand(
+ parent=ip_lib.IPDevice(name=bind_interface,
+ namespace=ns_name)
+ ).wait_until_address_ready(address=bind_address_v6)
+ except ip_lib.DADFailed as exc:
+ # This failure means that another DHCP agent has already
+ # configured this metadata address, so all requests will
+ # be via that single agent.
+ LOG.info('DAD failed for address %(address)s on interface '
+ '%(interface)s in namespace %(namespace)s on network '
+ '%(network)s, deleting it. Exception: %(exception)s',
+ {'address': bind_address_v6,
+ 'interface': bind_interface,
+ 'namespace': ns_name,
+ 'network': network_id,
+ 'exception': str(exc)})
+ try:
+ ip_lib.delete_ip_address(bind_address_v6, bind_interface,
+ namespace=ns_name)
+ except Exception as exc:
+ # do not re-raise a delete failure, just log
+ LOG.info('Address deletion failure: %s', str(exc))
+ return
pm.enable()
monitor.register(uuid, METADATA_SERVICE_NAME, pm)
cls.monitors[router_id] = pm
@@ -363,6 +385,6 @@ def apply_metadata_nat_rules(router, proxy):
if netutils.is_ipv6_enabled():
for c, r in proxy.metadata_nat_rules(
proxy.metadata_port,
- metadata_address=(constants.METADATA_V6_IP + '/128')):
+ metadata_address=(common_constants.METADATA_V6_CIDR)):
router.iptables_manager.ipv6['nat'].add_rule(c, r)
router.iptables_manager.apply()
diff --git a/neutron/common/_constants.py b/neutron/common/_constants.py
index e88cc5bc98..5220af80e7 100644
--- a/neutron/common/_constants.py
+++ b/neutron/common/_constants.py
@@ -81,3 +81,6 @@ AUTO_DELETE_PORT_OWNERS = [constants.DEVICE_OWNER_DHCP,
# The lowest binding index for L3 agents and DHCP agents.
LOWEST_AGENT_BINDING_INDEX = 1
+
+# Neutron-lib defines this with a /64 but it should be /128
+METADATA_V6_CIDR = constants.METADATA_V6_IP + '/128'
diff --git a/neutron/conf/agent/database/agentschedulers_db.py b/neutron/conf/agent/database/agentschedulers_db.py
index cf4f3c6cb0..e5fe956ede 100644
--- a/neutron/conf/agent/database/agentschedulers_db.py
+++ b/neutron/conf/agent/database/agentschedulers_db.py
@@ -32,7 +32,9 @@ AGENTS_SCHEDULER_OPTS = [
'network. If this number is greater than 1, the '
'scheduler automatically assigns multiple DHCP agents '
'for a given tenant network, providing high '
- 'availability for DHCP service.')),
+ 'availability for the DHCP service. However this does '
+ 'not provide high availability for the IPv6 metadata '
+ 'service in isolated networks.')),
cfg.BoolOpt('enable_services_on_agents_with_admin_state_down',
default=False,
help=_('Enable services on an agent with admin_state_up '
diff --git a/neutron/tests/unit/agent/dhcp/test_agent.py b/neutron/tests/unit/agent/dhcp/test_agent.py
index f2481a0d3b..4bb28d7692 100644
--- a/neutron/tests/unit/agent/dhcp/test_agent.py
+++ b/neutron/tests/unit/agent/dhcp/test_agent.py
@@ -37,6 +37,7 @@ from neutron.agent.linux import dhcp
from neutron.agent.linux import interface
from neutron.agent.linux import utils as linux_utils
from neutron.agent.metadata import driver as metadata_driver
+from neutron.common import _constants as common_constants
from neutron.common import config as common_config
from neutron.common import utils
from neutron.conf.agent import common as config
@@ -1924,7 +1925,7 @@ class TestDeviceManager(base.BaseTestCase):
expected_ips = ['172.9.9.9/24', const.METADATA_CIDR]
if ipv6_enabled:
- expected_ips.append(const.METADATA_V6_CIDR)
+ expected_ips.append(common_constants.METADATA_V6_CIDR)
expected = [mock.call.get_device_name(port)]
diff --git a/neutron/tests/unit/agent/linux/test_dhcp.py b/neutron/tests/unit/agent/linux/test_dhcp.py
index 1d0b1adca4..f5bbd97255 100644
--- a/neutron/tests/unit/agent/linux/test_dhcp.py
+++ b/neutron/tests/unit/agent/linux/test_dhcp.py
@@ -31,6 +31,7 @@ import testtools
from neutron.agent.linux import dhcp
from neutron.agent.linux import ip_lib
from neutron.cmd import runtime_checks as checks
+from neutron.common import _constants as common_constants
from neutron.conf.agent import common as config
from neutron.conf.agent import dhcp as dhcp_config
from neutron.conf import common as base_config
@@ -3253,7 +3254,7 @@ class TestDeviceManager(TestConfBase):
if enable_isolated_metadata or force_metadata:
expect_ips.extend([
constants.METADATA_CIDR,
- constants.METADATA_V6_CIDR])
+ common_constants.METADATA_V6_CIDR])
mgr.driver.init_l3.assert_called_with('ns-XXX',
expect_ips,
namespace='qdhcp-ns')
diff --git a/neutron/tests/unit/agent/linux/test_ip_lib.py b/neutron/tests/unit/agent/linux/test_ip_lib.py
index c7006ab741..0703f55a3e 100644
--- a/neutron/tests/unit/agent/linux/test_ip_lib.py
+++ b/neutron/tests/unit/agent/linux/test_ip_lib.py
@@ -832,7 +832,7 @@ class TestIpAddrCommand(TestIPCmdBase):
def test_wait_until_address_dadfailed(self):
self.addr_cmd.list = mock.Mock(
return_value=[{'tentative': True, 'dadfailed': True}])
- with testtools.ExpectedException(ip_lib.AddressNotReady):
+ with testtools.ExpectedException(ip_lib.DADFailed):
self.addr_cmd.wait_until_address_ready('abcd::1234')
@mock.patch.object(common_utils, 'wait_until_true')
diff --git a/neutron/tests/unit/agent/metadata/test_driver.py b/neutron/tests/unit/agent/metadata/test_driver.py
index 2081500259..f41c82a8c6 100644
--- a/neutron/tests/unit/agent/metadata/test_driver.py
+++ b/neutron/tests/unit/agent/metadata/test_driver.py
@@ -24,6 +24,7 @@ from oslo_utils import uuidutils
from neutron.agent.l3 import agent as l3_agent
from neutron.agent.l3 import router_info
+from neutron.agent.linux import ip_lib
from neutron.agent.linux import iptables_manager
from neutron.agent.linux import utils as linux_utils
from neutron.agent.metadata import driver as metadata_driver
@@ -74,6 +75,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
EUNAME = 'neutron'
EGNAME = 'neutron'
METADATA_DEFAULT_IP = '169.254.169.254'
+ METADATA_DEFAULT_IPV6 = 'fe80::a9fe:a9fe'
METADATA_PORT = 8080
METADATA_SOCKET = '/socket/path'
PIDFILE = 'pidfile'
@@ -129,7 +131,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
agent._process_updated_router(router)
f.assert_not_called()
- def test_spawn_metadata_proxy(self):
+ def _test_spawn_metadata_proxy(self, dad_failed=False):
router_id = _uuid()
router_ns = 'qrouter-%s' % router_id
ip_class_path = 'neutron.agent.linux.ip_lib.IPWrapper'
@@ -152,21 +154,31 @@ class TestMetadataDriverProcess(base.BaseTestCase):
mock.patch('os.makedirs'),\
mock.patch(
'neutron.agent.linux.ip_lib.'
- 'IpAddrCommand.wait_until_address_ready') as mock_wait:
+ 'IpAddrCommand.wait_until_address_ready') as mock_wait,\
+ mock.patch(
+ 'neutron.agent.linux.ip_lib.'
+ 'delete_ip_address') as mock_del:
+ agent.process_monitor = mock.Mock()
cfg_file = os.path.join(
metadata_driver.HaproxyConfigurator.get_config_path(
agent.conf.state_path),
"%s.conf" % router_id)
mock_open = self.useFixture(
lib_fixtures.OpenFixture(cfg_file)).mock_open
- mock_wait.return_value = True
+ if dad_failed:
+ mock_wait.side_effect = ip_lib.DADFailed(
+ address=self.METADATA_DEFAULT_IP, reason='DAD failed')
+ else:
+ mock_wait.return_value = True
agent.metadata_driver.spawn_monitored_metadata_proxy(
agent.process_monitor,
router_ns,
self.METADATA_PORT,
agent.conf,
bind_address=self.METADATA_DEFAULT_IP,
- router_id=router_id)
+ router_id=router_id,
+ bind_address_v6=self.METADATA_DEFAULT_IPV6,
+ bind_interface='fake-if')
netns_execute_args = [
'haproxy',
@@ -174,6 +186,8 @@ class TestMetadataDriverProcess(base.BaseTestCase):
log_tag = ("haproxy-" + metadata_driver.METADATA_SERVICE_NAME +
"-" + router_id)
+ bind_v6_line = 'bind %s:%s interface %s' % (
+ self.METADATA_DEFAULT_IPV6, self.METADATA_PORT, 'fake-if')
cfg_contents = metadata_driver._HAPROXY_CONFIG_TEMPLATE % {
'user': self.EUNAME,
'group': self.EGNAME,
@@ -186,18 +200,34 @@ class TestMetadataDriverProcess(base.BaseTestCase):
'pidfile': self.PIDFILE,
'log_level': 'debug',
'log_tag': log_tag,
- 'bind_v6_line': ''}
-
- mock_open.assert_has_calls([
- mock.call(cfg_file, 'w'),
- mock.call().write(cfg_contents)],
- any_order=True)
-
- ip_mock.assert_has_calls([
- mock.call(namespace=router_ns),
- mock.call().netns.execute(netns_execute_args, addl_env=None,
- run_as_root=True)
- ])
+ 'bind_v6_line': bind_v6_line}
+
+ if dad_failed:
+ agent.process_monitor.register.assert_not_called()
+ mock_del.assert_called_once_with(self.METADATA_DEFAULT_IPV6,
+ 'fake-if',
+ namespace=router_ns)
+ else:
+ mock_open.assert_has_calls([
+ mock.call(cfg_file, 'w'),
+ mock.call().write(cfg_contents)], any_order=True)
+
+ ip_mock.assert_has_calls([
+ mock.call(namespace=router_ns),
+ mock.call().netns.execute(netns_execute_args,
+ addl_env=None, run_as_root=True)
+ ])
+
+ agent.process_monitor.register.assert_called_once_with(
+ router_id, metadata_driver.METADATA_SERVICE_NAME,
+ mock.ANY)
+ mock_del.assert_not_called()
+
+ def test_spawn_metadata_proxy(self):
+ self._test_spawn_metadata_proxy()
+
+ def test_spawn_metadata_proxy_dad_failed(self):
+ self._test_spawn_metadata_proxy(dad_failed=True)
def test_create_config_file_wrong_user(self):
with mock.patch('pwd.getpwnam', side_effect=KeyError):
diff --git a/releasenotes/notes/bug-1953165-6e848ea2c0398f56.yaml b/releasenotes/notes/bug-1953165-6e848ea2c0398f56.yaml
new file mode 100644
index 0000000000..6c79c0daef
--- /dev/null
+++ b/releasenotes/notes/bug-1953165-6e848ea2c0398f56.yaml
@@ -0,0 +1,16 @@
+---
+issues:
+ - |
+ The high availability of metadata service on isolated networks is limited
+ or non-existent. IPv4 metadata is redundant when the DHCP agent managing
+ it is redundant, but recovery is tied to the renewal of the DHCP lease,
+ making most recoveries very slow. IPv6 metadata is not redundant at all
+ as the IPv6 metadata address can only be configured in a single place at
+ a time as it is link-local. Multiple agents trying to configure it will
+ generate an IPv6 duplicate address detection failure.
+
+ Administrators may observe the IPv6 metadata address in "dadfailed" state
+ in the DHCP namespace for this reason, which is only an indication it is
+ not highly available. Until a redesign is made to the isolated metadata
+ service there is not a better deployment option. See `bug 1953165
+ <https://bugs.launchpad.net/neutron/+bug/1953165>`_ for information.