summaryrefslogtreecommitdiff
path: root/nova
diff options
context:
space:
mode:
authoras0 <as3310@protonmail.com>2022-12-13 09:43:38 +0000
committeras0 <as3310@protonmail.com>2023-01-30 15:44:44 +0000
commitfba851bf3a34562db9cdb783ae539556b8b7a329 (patch)
tree67003cf5bc6d1443bd399942f57ff146ce23e474 /nova
parent9e2fdb4470603bf3c4f0d21560cced1eb748cd1b (diff)
downloadnova-fba851bf3a34562db9cdb783ae539556b8b7a329.tar.gz
Add further workaround features for qemu_monitor_announce_self
In some cases on Arista VXLAN fabrics, VMs are inaccessible via network after live migration, despite garps being observed on the fabric itself. This patch builds on the feature ``[workarounds]/enable_qemu_monitor_announce_self`` feature as reported in `bug 1815989 <https://bugs.launchpad.net/nova/+bug/1815989>` This patch adds the ability to config the number of times the QEMU announce_self monitor command is called, and add a new configuration option to specify a delay between calling the announce_self command multiple times, as in some cases, multiple announce_self monitor commands are required for the fabric to honor the garp packets and the VM to become accessible via the network after live migration. Closes-Bug: #1996995 Change-Id: I2f5bf7c9de621bb1dc7fae5b3374629a4fcc1f46
Diffstat (limited to 'nova')
-rw-r--r--nova/conf/workarounds.py22
-rw-r--r--nova/tests/unit/virt/libvirt/test_driver.py16
-rw-r--r--nova/virt/libvirt/driver.py41
3 files changed, 69 insertions, 10 deletions
diff --git a/nova/conf/workarounds.py b/nova/conf/workarounds.py
index 2ec53282cd..fd2eabbff0 100644
--- a/nova/conf/workarounds.py
+++ b/nova/conf/workarounds.py
@@ -374,6 +374,28 @@ Related options:
* :oslo.config:option:`DEFAULT.compute_driver` (libvirt)
"""),
+ cfg.IntOpt('qemu_monitor_announce_self_count',
+ default=3,
+ min=1,
+ help="""
+The total number of times to send the announce_self command to the QEMU
+monitor when enable_qemu_monitor_announce_self is enabled.
+
+Related options:
+
+* :oslo.config:option:`WORKAROUNDS.enable_qemu_monitor_announce_self` (libvirt)
+"""),
+ cfg.IntOpt('qemu_monitor_announce_self_interval',
+ default=1,
+ min=1,
+ help="""
+The number of seconds to wait before re-sending the announce_self
+command to the QEMU monitor.
+
+Related options:
+
+* :oslo.config:option:`WORKAROUNDS.enable_qemu_monitor_announce_self` (libvirt)
+"""),
cfg.BoolOpt('disable_compute_service_check_for_ffu',
default=False,
help="""
diff --git a/nova/tests/unit/virt/libvirt/test_driver.py b/nova/tests/unit/virt/libvirt/test_driver.py
index fee87d3bb5..ed5a2c386b 100644
--- a/nova/tests/unit/virt/libvirt/test_driver.py
+++ b/nova/tests/unit/virt/libvirt/test_driver.py
@@ -1822,6 +1822,22 @@ class LibvirtConnTestCase(test.NoDBTestCase,
mock_guest.set_user_password.assert_called_once_with("root", "123")
+ @mock.patch('nova.virt.libvirt.host.Host.get_guest')
+ def test_qemu_announce_self(self, mock_get_guest):
+ # Enable the workaround, configure to call announce_self 3 times
+ self.flags(enable_qemu_monitor_announce_self=True, group='workarounds')
+
+ mock_guest = mock.Mock(spec=libvirt_guest.Guest)
+ mock_get_guest.return_value = mock_guest
+
+ drvr = libvirt_driver.LibvirtDriver(fake.FakeVirtAPI(), False)
+ drvr._qemu_monitor_announce_self(mock_guest)
+
+ # Ensure that 3 calls are made as defined by option
+ # enable_qemu_monitor_announce_self_retries default of 3
+ mock_guest.announce_self.assert_any_call()
+ self.assertEqual(3, mock_guest.announce_self.call_count)
+
@mock.patch('nova.utils.get_image_from_system_metadata')
@mock.patch.object(host.Host,
'has_min_version', return_value=True)
diff --git a/nova/virt/libvirt/driver.py b/nova/virt/libvirt/driver.py
index b2f0fd0862..bd38b1f1f8 100644
--- a/nova/virt/libvirt/driver.py
+++ b/nova/virt/libvirt/driver.py
@@ -11034,16 +11034,37 @@ class LibvirtDriver(driver.ComputeDriver):
if not CONF.workarounds.enable_qemu_monitor_announce_self:
return
- LOG.info('Sending announce-self command to QEMU monitor',
- instance=instance)
-
- try:
- guest = self._host.get_guest(instance)
- guest.announce_self()
- except Exception:
- LOG.warning('Failed to send announce-self command to QEMU monitor',
- instance=instance)
- LOG.exception()
+ current_attempt = 0
+
+ max_attempts = (
+ CONF.workarounds.qemu_monitor_announce_self_count)
+ # qemu_monitor_announce_retry_interval specified in seconds
+ announce_pause = (
+ CONF.workarounds.qemu_monitor_announce_self_interval)
+
+ while(current_attempt < max_attempts):
+ # Increment attempt
+ current_attempt += 1
+
+ # Only use announce_pause after the first attempt to avoid
+ # pausing before calling announce_self for the first attempt
+ if current_attempt != 1:
+ greenthread.sleep(announce_pause)
+
+ LOG.info('Sending announce-self command to QEMU monitor. '
+ 'Attempt %(current_attempt)s of %(max_attempts)s',
+ {'current_attempt': current_attempt,
+ 'max_attempts': max_attempts}, instance=instance)
+ try:
+ guest = self._host.get_guest(instance)
+ guest.announce_self()
+ except Exception:
+ LOG.warning('Failed to send announce-self command to '
+ 'QEMU monitor. Attempt %(current_attempt)s of '
+ '%(max_attempts)s',
+ {'current_attempt': current_attempt,
+ 'max_attempts': max_attempts}, instance=instance)
+ LOG.exception()
def post_live_migration_at_destination(self, context,
instance,