summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/source/admin/config-dhcp-ha.rst32
-rw-r--r--neutron/agent/linux/dhcp.py3
-rw-r--r--neutron/agent/linux/ip_lib.py8
-rw-r--r--neutron/agent/metadata/driver.py30
-rw-r--r--neutron/common/_constants.py3
-rw-r--r--neutron/conf/agent/database/agentschedulers_db.py4
-rw-r--r--neutron/tests/unit/agent/dhcp/test_agent.py3
-rw-r--r--neutron/tests/unit/agent/linux/test_dhcp.py3
-rw-r--r--neutron/tests/unit/agent/linux/test_ip_lib.py2
-rw-r--r--neutron/tests/unit/agent/metadata/test_driver.py62
-rw-r--r--releasenotes/notes/bug-1953165-6e848ea2c0398f56.yaml16
11 files changed, 139 insertions, 27 deletions
diff --git a/doc/source/admin/config-dhcp-ha.rst b/doc/source/admin/config-dhcp-ha.rst
index 777dcc4905..4711d82cfc 100644
--- a/doc/source/admin/config-dhcp-ha.rst
+++ b/doc/source/admin/config-dhcp-ha.rst
@@ -442,6 +442,38 @@ To test the HA of DHCP agent:
#. Start DHCP agent on HostB. The VM gets the wanted IP again.
+No HA for metadata service on isolated networks
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+All Neutron backends using the DHCP agent can also provide `metadata service
+<https://docs.openstack.org/nova/latest/user/metadata.html>`_ in isolated
+networks (i.e. networks without a router). In this case the DHCP agent manages
+the metadata service (see config option `enable_isolated_metadata
+<https://docs.openstack.org/neutron/latest/configuration/dhcp-agent.html#DEFAULT.enable_isolated_metadata>`_).
+
+Note however that the metadata service is only redundant for IPv4, and not
+IPv6, even when the DHCP service is configured to be highly available
+(config option `dhcp_agents_per_network
+<https://docs.openstack.org/neutron/latest/configuration/neutron.html#DEFAULT.dhcp_agents_per_network>`_
+> 1). This is because the DHCP agent will insert a route to the well known
+metadata IPv4 address (`169.254.169.254`) via its own IP address, so it will
+be reachable as long as the DHCP service is available at that IP address.
+This also means that recovery after a failure is tied to the renewal of the
+DHCP lease, since that route will only change if the DHCP server for a VM
+changes.
+
+With IPv6, the well known metadata IPv6 address (`fe80::a9fe:a9fe`) is used,
+but directly configured in the DHCP agent network namespace.
+Due to the enforcement of duplicate address detection (DAD), this address
+can only be configured in at most one DHCP network namespaces at any time.
+See `RFC 4862 <https://www.rfc-editor.org/rfc/rfc4862#section-5.4>`_ for
+details on the DAD process.
+
+For this reason, even when you have multiple DHCP agents, an arbitrary one
+(where the metadata IPv6 address is not in `dadfailed` state) will serve all
+metadata requests over IPv6. When that metadata service instance becomes
+unreachable there is no failover and the service will become unreachable.
+
Disabling and removing an agent
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/neutron/agent/linux/dhcp.py b/neutron/agent/linux/dhcp.py
index 4eecf315d3..efa8fa8296 100644
--- a/neutron/agent/linux/dhcp.py
+++ b/neutron/agent/linux/dhcp.py
@@ -40,6 +40,7 @@ from neutron.agent.linux import external_process
from neutron.agent.linux import ip_lib
from neutron.agent.linux import iptables_manager
from neutron.cmd import runtime_checks as checks
+from neutron.common import _constants as common_constants
from neutron.common import utils as common_utils
from neutron.ipam import utils as ipam_utils
from neutron.privileged.agent.linux import dhcp as priv_dhcp
@@ -1765,7 +1766,7 @@ class DeviceManager(object):
if self.conf.force_metadata or self.conf.enable_isolated_metadata:
ip_cidrs.append(constants.METADATA_CIDR)
if netutils.is_ipv6_enabled():
- ip_cidrs.append(constants.METADATA_V6_CIDR)
+ ip_cidrs.append(common_constants.METADATA_V6_CIDR)
self.driver.init_l3(interface_name, ip_cidrs,
namespace=network.namespace)
diff --git a/neutron/agent/linux/ip_lib.py b/neutron/agent/linux/ip_lib.py
index fadad58d8f..ff121580fe 100644
--- a/neutron/agent/linux/ip_lib.py
+++ b/neutron/agent/linux/ip_lib.py
@@ -103,6 +103,10 @@ class AddressNotReady(exceptions.NeutronException):
"become ready: %(reason)s")
+class DADFailed(AddressNotReady):
+ pass
+
+
InvalidArgument = privileged.InvalidArgument
@@ -581,7 +585,7 @@ class IpAddrCommand(IpDeviceCommandBase):
"""Wait until an address is no longer marked 'tentative' or 'dadfailed'
raises AddressNotReady if times out, address not present on interface
- or DAD fails
+ raises DADFailed if Duplicate Address Detection fails
"""
def is_address_ready():
try:
@@ -593,7 +597,7 @@ class IpAddrCommand(IpDeviceCommandBase):
# Since both 'dadfailed' and 'tentative' will be set if DAD fails,
# check 'dadfailed' first just to be explicit
if addr_info['dadfailed']:
- raise AddressNotReady(
+ raise DADFailed(
address=address, reason=_('Duplicate address detected'))
if addr_info['tentative']:
return False
diff --git a/neutron/agent/metadata/driver.py b/neutron/agent/metadata/driver.py
index 0a7fb5f552..aea06f8de0 100644
--- a/neutron/agent/metadata/driver.py
+++ b/neutron/agent/metadata/driver.py
@@ -33,6 +33,7 @@ from neutron.agent.l3 import namespaces
from neutron.agent.linux import external_process
from neutron.agent.linux import ip_lib
from neutron.agent.linux import utils as linux_utils
+from neutron.common import _constants as common_constants
from neutron.common import coordination
from neutron.common import utils as common_utils
@@ -266,9 +267,30 @@ class MetadataDriver(object):
# HAProxy cannot bind() until IPv6 Duplicate Address Detection
# completes. We must wait until the address leaves its 'tentative'
# state.
- ip_lib.IpAddrCommand(
- parent=ip_lib.IPDevice(name=bind_interface, namespace=ns_name)
- ).wait_until_address_ready(address=bind_address_v6)
+ try:
+ ip_lib.IpAddrCommand(
+ parent=ip_lib.IPDevice(name=bind_interface,
+ namespace=ns_name)
+ ).wait_until_address_ready(address=bind_address_v6)
+ except ip_lib.DADFailed as exc:
+ # This failure means that another DHCP agent has already
+ # configured this metadata address, so all requests will
+ # be via that single agent.
+ LOG.info('DAD failed for address %(address)s on interface '
+ '%(interface)s in namespace %(namespace)s on network '
+ '%(network)s, deleting it. Exception: %(exception)s',
+ {'address': bind_address_v6,
+ 'interface': bind_interface,
+ 'namespace': ns_name,
+ 'network': network_id,
+ 'exception': str(exc)})
+ try:
+ ip_lib.delete_ip_address(bind_address_v6, bind_interface,
+ namespace=ns_name)
+ except Exception as exc:
+ # do not re-raise a delete failure, just log
+ LOG.info('Address deletion failure: %s', str(exc))
+ return
pm.enable()
monitor.register(uuid, METADATA_SERVICE_NAME, pm)
cls.monitors[router_id] = pm
@@ -363,6 +385,6 @@ def apply_metadata_nat_rules(router, proxy):
if netutils.is_ipv6_enabled():
for c, r in proxy.metadata_nat_rules(
proxy.metadata_port,
- metadata_address=(constants.METADATA_V6_IP + '/128')):
+ metadata_address=(common_constants.METADATA_V6_CIDR)):
router.iptables_manager.ipv6['nat'].add_rule(c, r)
router.iptables_manager.apply()
diff --git a/neutron/common/_constants.py b/neutron/common/_constants.py
index e88cc5bc98..5220af80e7 100644
--- a/neutron/common/_constants.py
+++ b/neutron/common/_constants.py
@@ -81,3 +81,6 @@ AUTO_DELETE_PORT_OWNERS = [constants.DEVICE_OWNER_DHCP,
# The lowest binding index for L3 agents and DHCP agents.
LOWEST_AGENT_BINDING_INDEX = 1
+
+# Neutron-lib defines this with a /64 but it should be /128
+METADATA_V6_CIDR = constants.METADATA_V6_IP + '/128'
diff --git a/neutron/conf/agent/database/agentschedulers_db.py b/neutron/conf/agent/database/agentschedulers_db.py
index cf4f3c6cb0..e5fe956ede 100644
--- a/neutron/conf/agent/database/agentschedulers_db.py
+++ b/neutron/conf/agent/database/agentschedulers_db.py
@@ -32,7 +32,9 @@ AGENTS_SCHEDULER_OPTS = [
'network. If this number is greater than 1, the '
'scheduler automatically assigns multiple DHCP agents '
'for a given tenant network, providing high '
- 'availability for DHCP service.')),
+ 'availability for the DHCP service. However this does '
+ 'not provide high availability for the IPv6 metadata '
+ 'service in isolated networks.')),
cfg.BoolOpt('enable_services_on_agents_with_admin_state_down',
default=False,
help=_('Enable services on an agent with admin_state_up '
diff --git a/neutron/tests/unit/agent/dhcp/test_agent.py b/neutron/tests/unit/agent/dhcp/test_agent.py
index fa1533271c..3e137ab8db 100644
--- a/neutron/tests/unit/agent/dhcp/test_agent.py
+++ b/neutron/tests/unit/agent/dhcp/test_agent.py
@@ -37,6 +37,7 @@ from neutron.agent.linux import dhcp
from neutron.agent.linux import interface
from neutron.agent.linux import utils as linux_utils
from neutron.agent.metadata import driver as metadata_driver
+from neutron.common import _constants as common_constants
from neutron.common import config as common_config
from neutron.common import utils
from neutron.conf.agent import common as config
@@ -1929,7 +1930,7 @@ class TestDeviceManager(base.BaseTestCase):
expected_ips = ['172.9.9.9/24', const.METADATA_CIDR]
if ipv6_enabled:
- expected_ips.append(const.METADATA_V6_CIDR)
+ expected_ips.append(common_constants.METADATA_V6_CIDR)
expected = [mock.call.get_device_name(port)]
diff --git a/neutron/tests/unit/agent/linux/test_dhcp.py b/neutron/tests/unit/agent/linux/test_dhcp.py
index b88fe2d318..67e58750dd 100644
--- a/neutron/tests/unit/agent/linux/test_dhcp.py
+++ b/neutron/tests/unit/agent/linux/test_dhcp.py
@@ -31,6 +31,7 @@ import testtools
from neutron.agent.linux import dhcp
from neutron.agent.linux import ip_lib
from neutron.cmd import runtime_checks as checks
+from neutron.common import _constants as common_constants
from neutron.conf.agent import common as config
from neutron.conf.agent import dhcp as dhcp_config
from neutron.conf import common as base_config
@@ -3254,7 +3255,7 @@ class TestDeviceManager(TestConfBase):
if enable_isolated_metadata or force_metadata:
expect_ips.extend([
constants.METADATA_CIDR,
- constants.METADATA_V6_CIDR])
+ common_constants.METADATA_V6_CIDR])
mgr.driver.init_l3.assert_called_with('ns-XXX',
expect_ips,
namespace='qdhcp-ns')
diff --git a/neutron/tests/unit/agent/linux/test_ip_lib.py b/neutron/tests/unit/agent/linux/test_ip_lib.py
index 3ac578093b..393a828529 100644
--- a/neutron/tests/unit/agent/linux/test_ip_lib.py
+++ b/neutron/tests/unit/agent/linux/test_ip_lib.py
@@ -792,7 +792,7 @@ class TestIpAddrCommand(TestIPCmdBase):
def test_wait_until_address_dadfailed(self):
self.addr_cmd.list = mock.Mock(
return_value=[{'tentative': True, 'dadfailed': True}])
- with testtools.ExpectedException(ip_lib.AddressNotReady):
+ with testtools.ExpectedException(ip_lib.DADFailed):
self.addr_cmd.wait_until_address_ready('abcd::1234')
@mock.patch.object(common_utils, 'wait_until_true')
diff --git a/neutron/tests/unit/agent/metadata/test_driver.py b/neutron/tests/unit/agent/metadata/test_driver.py
index 2081500259..f41c82a8c6 100644
--- a/neutron/tests/unit/agent/metadata/test_driver.py
+++ b/neutron/tests/unit/agent/metadata/test_driver.py
@@ -24,6 +24,7 @@ from oslo_utils import uuidutils
from neutron.agent.l3 import agent as l3_agent
from neutron.agent.l3 import router_info
+from neutron.agent.linux import ip_lib
from neutron.agent.linux import iptables_manager
from neutron.agent.linux import utils as linux_utils
from neutron.agent.metadata import driver as metadata_driver
@@ -74,6 +75,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
EUNAME = 'neutron'
EGNAME = 'neutron'
METADATA_DEFAULT_IP = '169.254.169.254'
+ METADATA_DEFAULT_IPV6 = 'fe80::a9fe:a9fe'
METADATA_PORT = 8080
METADATA_SOCKET = '/socket/path'
PIDFILE = 'pidfile'
@@ -129,7 +131,7 @@ class TestMetadataDriverProcess(base.BaseTestCase):
agent._process_updated_router(router)
f.assert_not_called()
- def test_spawn_metadata_proxy(self):
+ def _test_spawn_metadata_proxy(self, dad_failed=False):
router_id = _uuid()
router_ns = 'qrouter-%s' % router_id
ip_class_path = 'neutron.agent.linux.ip_lib.IPWrapper'
@@ -152,21 +154,31 @@ class TestMetadataDriverProcess(base.BaseTestCase):
mock.patch('os.makedirs'),\
mock.patch(
'neutron.agent.linux.ip_lib.'
- 'IpAddrCommand.wait_until_address_ready') as mock_wait:
+ 'IpAddrCommand.wait_until_address_ready') as mock_wait,\
+ mock.patch(
+ 'neutron.agent.linux.ip_lib.'
+ 'delete_ip_address') as mock_del:
+ agent.process_monitor = mock.Mock()
cfg_file = os.path.join(
metadata_driver.HaproxyConfigurator.get_config_path(
agent.conf.state_path),
"%s.conf" % router_id)
mock_open = self.useFixture(
lib_fixtures.OpenFixture(cfg_file)).mock_open
- mock_wait.return_value = True
+ if dad_failed:
+ mock_wait.side_effect = ip_lib.DADFailed(
+ address=self.METADATA_DEFAULT_IP, reason='DAD failed')
+ else:
+ mock_wait.return_value = True
agent.metadata_driver.spawn_monitored_metadata_proxy(
agent.process_monitor,
router_ns,
self.METADATA_PORT,
agent.conf,
bind_address=self.METADATA_DEFAULT_IP,
- router_id=router_id)
+ router_id=router_id,
+ bind_address_v6=self.METADATA_DEFAULT_IPV6,
+ bind_interface='fake-if')
netns_execute_args = [
'haproxy',
@@ -174,6 +186,8 @@ class TestMetadataDriverProcess(base.BaseTestCase):
log_tag = ("haproxy-" + metadata_driver.METADATA_SERVICE_NAME +
"-" + router_id)
+ bind_v6_line = 'bind %s:%s interface %s' % (
+ self.METADATA_DEFAULT_IPV6, self.METADATA_PORT, 'fake-if')
cfg_contents = metadata_driver._HAPROXY_CONFIG_TEMPLATE % {
'user': self.EUNAME,
'group': self.EGNAME,
@@ -186,18 +200,34 @@ class TestMetadataDriverProcess(base.BaseTestCase):
'pidfile': self.PIDFILE,
'log_level': 'debug',
'log_tag': log_tag,
- 'bind_v6_line': ''}
-
- mock_open.assert_has_calls([
- mock.call(cfg_file, 'w'),
- mock.call().write(cfg_contents)],
- any_order=True)
-
- ip_mock.assert_has_calls([
- mock.call(namespace=router_ns),
- mock.call().netns.execute(netns_execute_args, addl_env=None,
- run_as_root=True)
- ])
+ 'bind_v6_line': bind_v6_line}
+
+ if dad_failed:
+ agent.process_monitor.register.assert_not_called()
+ mock_del.assert_called_once_with(self.METADATA_DEFAULT_IPV6,
+ 'fake-if',
+ namespace=router_ns)
+ else:
+ mock_open.assert_has_calls([
+ mock.call(cfg_file, 'w'),
+ mock.call().write(cfg_contents)], any_order=True)
+
+ ip_mock.assert_has_calls([
+ mock.call(namespace=router_ns),
+ mock.call().netns.execute(netns_execute_args,
+ addl_env=None, run_as_root=True)
+ ])
+
+ agent.process_monitor.register.assert_called_once_with(
+ router_id, metadata_driver.METADATA_SERVICE_NAME,
+ mock.ANY)
+ mock_del.assert_not_called()
+
+ def test_spawn_metadata_proxy(self):
+ self._test_spawn_metadata_proxy()
+
+ def test_spawn_metadata_proxy_dad_failed(self):
+ self._test_spawn_metadata_proxy(dad_failed=True)
def test_create_config_file_wrong_user(self):
with mock.patch('pwd.getpwnam', side_effect=KeyError):
diff --git a/releasenotes/notes/bug-1953165-6e848ea2c0398f56.yaml b/releasenotes/notes/bug-1953165-6e848ea2c0398f56.yaml
new file mode 100644
index 0000000000..6c79c0daef
--- /dev/null
+++ b/releasenotes/notes/bug-1953165-6e848ea2c0398f56.yaml
@@ -0,0 +1,16 @@
+---
+issues:
+ - |
+ The high availability of metadata service on isolated networks is limited
+ or non-existent. IPv4 metadata is redundant when the DHCP agent managing
+ it is redundant, but recovery is tied to the renewal of the DHCP lease,
+ making most recoveries very slow. IPv6 metadata is not redundant at all
+ as the IPv6 metadata address can only be configured in a single place at
+ a time as it is link-local. Multiple agents trying to configure it will
+ generate an IPv6 duplicate address detection failure.
+
+ Administrators may observe the IPv6 metadata address in "dadfailed" state
+ in the DHCP namespace for this reason, which is only an indication it is
+ not highly available. Until a redesign is made to the isolated metadata
+ service there is not a better deployment option. See `bug 1953165
+ <https://bugs.launchpad.net/neutron/+bug/1953165>`_ for information.