diff options
-rw-r--r-- | nova/conf/workarounds.py | 22 | ||||
-rw-r--r-- | nova/tests/unit/virt/libvirt/test_driver.py | 16 | ||||
-rw-r--r-- | nova/virt/libvirt/driver.py | 41 | ||||
-rw-r--r-- | releasenotes/notes/bug-1996995-qemu_monitor_announce_self-add-configurables-2b2f19d238442f72.yaml | 28 |
4 files changed, 97 insertions, 10 deletions
diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py index 2ec53282cd..fd2eabbff0 100644 --- a/nova/conf/workarounds.py +++ b/nova/conf/workarounds.py @@ -374,6 +374,28 @@ Related options: * :oslo.config:option:`DEFAULT.compute_driver` (libvirt) """), + cfg.IntOpt('qemu_monitor_announce_self_count', + default=3, + min=1, + help=""" +The total number of times to send the announce_self command to the QEMU +monitor when enable_qemu_monitor_announce_self is enabled. + +Related options: + +* :oslo.config:option:`WORKAROUNDS.enable_qemu_monitor_announce_self` (libvirt) +"""), + cfg.IntOpt('qemu_monitor_announce_self_interval', + default=1, + min=1, + help=""" +The number of seconds to wait before re-sending the announce_self +command to the QEMU monitor. + +Related options: + +* :oslo.config:option:`WORKAROUNDS.enable_qemu_monitor_announce_self` (libvirt) +"""), cfg.BoolOpt('disable_compute_service_check_for_ffu', default=False, help=""" diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py index fee87d3bb5..ed5a2c386b 100644 --- a/nova/tests/unit/virt/libvirt/test_driver.py +++ b/nova/tests/unit/virt/libvirt/test_driver.py @@ -1822,6 +1822,22 @@ class LibvirtConnTestCase(test.NoDBTestCase, mock_guest.set_user_password.assert_called_once_with("root", "123") + @mock.patch('nova.virt.libvirt.host.Host.get_guest') + def test_qemu_announce_self(self, mock_get_guest): + # Enable the workaround, configure to call announce_self 3 times + self.flags(enable_qemu_monitor_announce_self=True, group='workarounds') + + mock_guest = mock.Mock(spec=libvirt_guest.Guest) + mock_get_guest.return_value = mock_guest + + drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False) + drvr._qemu_monitor_announce_self(mock_guest) + + # Ensure that 3 calls are made as defined by option + # enable_qemu_monitor_announce_self_retries default of 3 + mock_guest.announce_self.assert_any_call() + self.assertEqual(3, mock_guest.announce_self.call_count) + @mock.patch('nova.utils.get_image_from_system_metadata') @mock.patch.object(host.Host, 'has_min_version', return_value=True) diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py index b2f0fd0862..bd38b1f1f8 100644 --- a/nova/virt/libvirt/driver.py +++ b/nova/virt/libvirt/driver.py @@ -11034,16 +11034,37 @@ class LibvirtDriver(driver.ComputeDriver): if not CONF.workarounds.enable_qemu_monitor_announce_self: return - LOG.info('Sending announce-self command to QEMU monitor', - instance=instance) - - try: - guest = self._host.get_guest(instance) - guest.announce_self() - except Exception: - LOG.warning('Failed to send announce-self command to QEMU monitor', - instance=instance) - LOG.exception() + current_attempt = 0 + + max_attempts = ( + CONF.workarounds.qemu_monitor_announce_self_count) + # qemu_monitor_announce_retry_interval specified in seconds + announce_pause = ( + CONF.workarounds.qemu_monitor_announce_self_interval) + + while(current_attempt < max_attempts): + # Increment attempt + current_attempt += 1 + + # Only use announce_pause after the first attempt to avoid + # pausing before calling announce_self for the first attempt + if current_attempt != 1: + greenthread.sleep(announce_pause) + + LOG.info('Sending announce-self command to QEMU monitor. ' + 'Attempt %(current_attempt)s of %(max_attempts)s', + {'current_attempt': current_attempt, + 'max_attempts': max_attempts}, instance=instance) + try: + guest = self._host.get_guest(instance) + guest.announce_self() + except Exception: + LOG.warning('Failed to send announce-self command to ' + 'QEMU monitor. Attempt %(current_attempt)s of ' + '%(max_attempts)s', + {'current_attempt': current_attempt, + 'max_attempts': max_attempts}, instance=instance) + LOG.exception() def post_live_migration_at_destination(self, context, instance, diff --git a/releasenotes/notes/bug-1996995-qemu_monitor_announce_self-add-configurables-2b2f19d238442f72.yaml b/releasenotes/notes/bug-1996995-qemu_monitor_announce_self-add-configurables-2b2f19d238442f72.yaml new file mode 100644 index 0000000000..0941dd7450 --- /dev/null +++ b/releasenotes/notes/bug-1996995-qemu_monitor_announce_self-add-configurables-2b2f19d238442f72.yaml @@ -0,0 +1,28 @@ +--- +fixes: + - | + Fixes `bug 1996995`_ in which VMs live migrated on certain VXLAN Arista + network fabrics were inaccessible until the switch arp cache expired. + + A Nova workaround option of ``enable_qemu_monitor_announce_self`` was added + to fix `bug 1815989`_ which when enabled would interact with the QEMU + monitor and force a VM to announce itself. + + On certain network fabrics, VMs that are live migrated remain inaccessible + via the network despite the QEMU monitor announce_self command successfully + being called. + + It was noted that on Arista VXLAN fabrics, testing showed that it required + several attempts of running the QEMU announce_self monitor command before + the switch would acknowledge a VM's new location on the fabric. + + This fix introduces two operator configurable options. + The first option sets the number of times the QEMU monitor announce_self + command is called - ``qemu_announce_self_count`` + + The second option allows operators to set the delay between the QEMU + announce_self commands in seconds for subsequent announce_self commands + with ``qemu_announce_self_interval`` + + .. _`bug 1996995`: https://bugs.launchpad.net/nova/+bug/1996995 + .. _`bug 1815989`: https://bugs.launchpad.net/nova/+bug/1815989 |