summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/source/install/enrollment.rst26
-rw-r--r--ironic/common/image_service.py35
-rw-r--r--ironic/common/rpc_service.py18
-rw-r--r--ironic/conductor/base_manager.py17
-rw-r--r--ironic/conductor/utils.py5
-rw-r--r--ironic/conf/conductor.py8
-rw-r--r--ironic/db/sqlalchemy/api.py6
-rw-r--r--ironic/tests/base.py15
-rw-r--r--ironic/tests/unit/common/test_image_service.py107
-rw-r--r--ironic/tests/unit/common/test_rpc_service.py50
-rw-r--r--ironic/tests/unit/conductor/test_cleaning.py30
-rw-r--r--ironic/tests/unit/conductor/test_manager.py3
-rw-r--r--ironic/tests/unit/conductor/test_utils.py11
-rw-r--r--playbooks/metal3-ci/run.yaml2
-rw-r--r--releasenotes/notes/Cleanfail-power-off-13b5fdcc2727866a.yaml8
-rw-r--r--releasenotes/notes/cross-link-1ffd1a4958f14fd7.yaml5
-rw-r--r--releasenotes/notes/graceful_shutdown_wait-9a62627714b86726.yaml15
-rwxr-xr-xtools/test-setup.sh2
-rw-r--r--tox.ini2
19 files changed, 214 insertions, 151 deletions
diff --git a/doc/source/install/enrollment.rst b/doc/source/install/enrollment.rst
index 9f9355d75..40c63b4bb 100644
--- a/doc/source/install/enrollment.rst
+++ b/doc/source/install/enrollment.rst
@@ -81,15 +81,9 @@ affected, since the initial provision state is still ``available``.
However, using API version 1.11 or above may break existing automation tooling
with respect to node creation.
-The default API version used by (the most recent) python-ironicclient is 1.9,
-but it may change in the future and should not be relied on.
-
-In the examples below we will use version 1.11 of the Bare metal API.
-This gives us the following advantages:
-
-* Explicit power credentials validation before leaving the ``enroll`` state.
-* Running node cleaning before entering the ``available`` state.
-* Not exposing half-configured nodes to the scheduler.
+The ``openstack baremetal`` command line tool tries to negotiate the most
+recent supported version, which in virtually all cases will be newer than
+1.11.
To set the API version for all commands, you can set the environment variable
``IRONIC_API_VERSION``. For the OpenStackClient baremetal plugin, set
@@ -118,7 +112,6 @@ and may be combined if desired.
.. code-block:: console
- $ export OS_BAREMETAL_API_VERSION=1.11
$ baremetal node create --driver ipmi
+--------------+--------------------------------------+
| Property | Value |
@@ -423,12 +416,13 @@ Validating node information
Making node available for deployment
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In order for nodes to be available for deploying workloads on them, nodes
-must be in the ``available`` provision state. To do this, nodes
-created with API version 1.11 and above must be moved from the ``enroll`` state
-to the ``manageable`` state and then to the ``available`` state.
-This section can be safely skipped, if API version 1.10 or earlier is used
-(which is the case by default).
+In order for nodes to be available for deploying workloads on them, nodes must
+be in the ``available`` provision state. To do this, nodes must be moved from
+the ``enroll`` state to the ``manageable`` state and then to the ``available``
+state.
+
+.. note::
+ This section can be skipped, if API version 1.10 or earlier is used.
After creating a node and before moving it from its initial provision state of
``enroll``, basic power and port information needs to be configured on the node.
diff --git a/ironic/common/image_service.py b/ironic/common/image_service.py
index 5280ee6bc..4b95b5152 100644
--- a/ironic/common/image_service.py
+++ b/ironic/common/image_service.py
@@ -34,10 +34,6 @@ from ironic.common import utils
from ironic.conf import CONF
IMAGE_CHUNK_SIZE = 1024 * 1024 # 1mb
-# NOTE(kaifeng) Image will be truncated to 2GiB by sendfile,
-# we use a large chunk size here for a better performance
-# while keep the chunk size less than the size limit.
-SENDFILE_CHUNK_SIZE = 1024 * 1024 * 1024 # 1Gb
LOG = log.getLogger(__name__)
@@ -264,26 +260,23 @@ class FileImageService(BaseImageService):
"""
source_image_path = self.validate_href(image_href)
dest_image_path = image_file.name
- local_device = os.stat(dest_image_path).st_dev
try:
- # We should have read and write access to source file to create
- # hard link to it.
- if (local_device == os.stat(source_image_path).st_dev
- and os.access(source_image_path, os.R_OK | os.W_OK)):
- image_file.close()
- os.remove(dest_image_path)
+ image_file.close()
+ os.remove(dest_image_path)
+
+ try:
os.link(source_image_path, dest_image_path)
+ except OSError as exc:
+ LOG.debug('Could not create a link from %(src)s to %(dest)s, '
+ 'will copy the content instead. Error: %(exc)s.',
+ {'src': source_image_path, 'dest': dest_image_path,
+ 'exc': exc})
else:
- filesize = os.path.getsize(source_image_path)
- offset = 0
- with open(source_image_path, 'rb') as input_img:
- while offset < filesize:
- count = min(SENDFILE_CHUNK_SIZE, filesize - offset)
- nbytes_out = os.sendfile(image_file.fileno(),
- input_img.fileno(),
- offset,
- count)
- offset += nbytes_out
+ return
+
+ # NOTE(dtantsur): starting with Python 3.8, copyfile() uses
+ # efficient copying (i.e. sendfile) under the hood.
+ shutil.copyfile(source_image_path, dest_image_path)
except Exception as e:
raise exception.ImageDownloadFailed(image_href=image_href,
reason=str(e))
diff --git a/ironic/common/rpc_service.py b/ironic/common/rpc_service.py
index cb0f23c98..a74f6bab3 100644
--- a/ironic/common/rpc_service.py
+++ b/ironic/common/rpc_service.py
@@ -100,7 +100,8 @@ class RPCService(service.Service):
seconds=CONF.hash_ring_reset_interval)
try:
- self.manager.del_host(deregister=self.deregister)
+ self.manager.del_host(deregister=self.deregister,
+ clear_node_reservations=False)
except Exception as e:
LOG.exception('Service error occurred when cleaning up '
'the RPC manager. Error: %s', e)
@@ -127,6 +128,21 @@ class RPCService(service.Service):
LOG.info('Stopped RPC server for service %(service)s on host '
'%(host)s.',
{'service': self.topic, 'host': self.host})
+
+ # Wait for reservation locks held by this conductor.
+ # The conductor process will end when:
+ # - All reservations for this conductor are released
+ # - CONF.graceful_shutdown_timeout has elapsed
+ # - The process manager (systemd, kubernetes) sends SIGKILL after the
+ # configured graceful period
+ graceful_time = initial_time + datetime.timedelta(
+ seconds=CONF.graceful_shutdown_timeout)
+ while (self.manager.has_reserved()
+ and graceful_time > timeutils.utcnow()):
+ LOG.info('Waiting for reserved nodes to clear on host %(host)s',
+ {'host': self.host})
+ time.sleep(1)
+
rpc.set_global_manager(None)
def _handle_signal(self, signo, frame):
diff --git a/ironic/conductor/base_manager.py b/ironic/conductor/base_manager.py
index 5c2e4ea95..544411e1d 100644
--- a/ironic/conductor/base_manager.py
+++ b/ironic/conductor/base_manager.py
@@ -298,15 +298,17 @@ class BaseConductorManager(object):
# This is only used in tests currently. Delete it?
self._periodic_task_callables = periodic_task_callables
- def del_host(self, deregister=True):
+ def del_host(self, deregister=True, clear_node_reservations=True):
# Conductor deregistration fails if called on non-initialized
# conductor (e.g. when rpc server is unreachable).
if not hasattr(self, 'conductor'):
return
self._shutdown = True
self._keepalive_evt.set()
- # clear all locks held by this conductor before deregistering
- self.dbapi.clear_node_reservations_for_conductor(self.host)
+
+ if clear_node_reservations:
+ # clear all locks held by this conductor before deregistering
+ self.dbapi.clear_node_reservations_for_conductor(self.host)
if deregister:
try:
# Inform the cluster that this conductor is shutting down.
@@ -338,6 +340,15 @@ class BaseConductorManager(object):
"""Return a count of currently online conductors"""
return len(self.dbapi.get_online_conductors())
+ def has_reserved(self):
+ """Determines if this host currently has any reserved nodes
+
+ :returns: True if this host has reserved nodes
+ """
+ return bool(self.dbapi.get_nodeinfo_list(
+ filters={'reserved_by_any_of': [self.host]},
+ limit=1))
+
def _register_and_validate_hardware_interfaces(self, hardware_types):
"""Register and validate hardware interfaces for this conductor.
diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py
index 2272c0df7..add9ee74d 100644
--- a/ironic/conductor/utils.py
+++ b/ironic/conductor/utils.py
@@ -499,6 +499,11 @@ def cleaning_error_handler(task, logmsg, errmsg=None, traceback=False,
# NOTE(dtantsur): avoid overwriting existing maintenance_reason
if not node.maintenance_reason and set_maintenance:
node.maintenance_reason = errmsg
+
+ if CONF.conductor.poweroff_in_cleanfail:
+ # NOTE(NobodyCam): Power off node in clean fail
+ node_power_action(task, states.POWER_OFF)
+
node.save()
if set_fail_state and node.provision_state != states.CLEANFAIL:
diff --git a/ironic/conf/conductor.py b/ironic/conf/conductor.py
index 653e30f56..2452fafe7 100644
--- a/ironic/conf/conductor.py
+++ b/ironic/conf/conductor.py
@@ -349,6 +349,14 @@ opts = [
'is a global setting applying to all requests this '
'conductor receives, regardless of access rights. '
'The concurrent clean limit cannot be disabled.')),
+
+ cfg.BoolOpt('poweroff_in_cleanfail',
+ default=False,
+ help=_('If True power off nodes in the ``clean failed`` '
+ 'state. Default False. Option may be unsafe '
+ 'when using Cleaning to perform '
+ 'hardware-transformative actions such as '
+ 'firmware upgrade.')),
]
diff --git a/ironic/db/sqlalchemy/api.py b/ironic/db/sqlalchemy/api.py
index 93a211fc3..31ec9647e 100644
--- a/ironic/db/sqlalchemy/api.py
+++ b/ironic/db/sqlalchemy/api.py
@@ -1379,12 +1379,14 @@ class Connection(api.Connection):
def list_hardware_type_interfaces(self, hardware_types):
with _session_for_read() as session:
- query = (session.query(models.ConductorHardwareInterfaces)
+ query = (session.query(models.ConductorHardwareInterfaces,
+ models.Conductor)
+ .join(models.Conductor)
.filter(models.ConductorHardwareInterfaces.hardware_type
.in_(hardware_types)))
query = _filter_active_conductors(query)
- return query.all()
+ return [row[0] for row in query]
@oslo_db_api.retry_on_deadlock
def register_conductor_hardware_interfaces(self, conductor_id, interfaces):
diff --git a/ironic/tests/base.py b/ironic/tests/base.py
index 348f15c20..69e449d3b 100644
--- a/ironic/tests/base.py
+++ b/ironic/tests/base.py
@@ -102,6 +102,11 @@ class WarningsFixture(fixtures.Fixture):
category=UserWarning,
)
+ # NOTE(gibi): The UUIDFields emits a warning if the value is not a
+ # valid UUID. Let's escalate that to an exception in the test to
+ # prevent adding violations.
+ warnings.filterwarnings('error', message='.* is an invalid UUID.')
+
# Enable deprecation warnings to capture upcoming SQLAlchemy changes
warnings.filterwarnings(
@@ -125,16 +130,6 @@ class WarningsFixture(fixtures.Fixture):
category=sqla_exc.SAWarning,
)
- # ...but filter everything out until we get around to fixing them
- # TODO(stephenfin): Fix all of these
-
- warnings.filterwarnings(
- 'ignore',
- module='ironic',
- message='SELECT statement has a cartesian product ',
- category=sqla_exc.SAWarning,
- )
-
# FIXME(stephenfin): We can remove this once oslo.db is fixed
# https://review.opendev.org/c/openstack/oslo.db/+/856453
warnings.filterwarnings(
diff --git a/ironic/tests/unit/common/test_image_service.py b/ironic/tests/unit/common/test_image_service.py
index 197518e1a..297a1b4b9 100644
--- a/ironic/tests/unit/common/test_image_service.py
+++ b/ironic/tests/unit/common/test_image_service.py
@@ -10,7 +10,6 @@
# License for the specific language governing permissions and limitations
# under the License.
-import builtins
import datetime
from http import client as http_client
import io
@@ -483,119 +482,55 @@ class FileImageServiceTestCase(base.TestCase):
'properties': {},
'no_cache': True}, result)
+ @mock.patch.object(shutil, 'copyfile', autospec=True)
@mock.patch.object(os, 'link', autospec=True)
@mock.patch.object(os, 'remove', autospec=True)
- @mock.patch.object(os, 'access', return_value=True, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
@mock.patch.object(image_service.FileImageService, 'validate_href',
autospec=True)
- def test_download_hard_link(self, _validate_mock, stat_mock, access_mock,
- remove_mock, link_mock):
+ def test_download_hard_link(self, _validate_mock, remove_mock, link_mock,
+ copy_mock):
_validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
file_mock = mock.Mock(spec=io.BytesIO)
file_mock.name = 'file'
self.service.download(self.href, file_mock)
_validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
remove_mock.assert_called_once_with('file')
link_mock.assert_called_once_with(self.href_path, 'file')
+ copy_mock.assert_not_called()
- @mock.patch.object(os, 'sendfile', return_value=42, autospec=True)
- @mock.patch.object(os.path, 'getsize', return_value=42, autospec=True)
- @mock.patch.object(builtins, 'open', autospec=True)
- @mock.patch.object(os, 'access', return_value=False, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
- @mock.patch.object(image_service.FileImageService, 'validate_href',
- autospec=True)
- def test_download_copy(self, _validate_mock, stat_mock, access_mock,
- open_mock, size_mock, copy_mock):
- _validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
- file_mock = mock.MagicMock(spec=io.BytesIO)
- file_mock.name = 'file'
- input_mock = mock.MagicMock(spec=io.BytesIO)
- open_mock.return_value = input_mock
- self.service.download(self.href, file_mock)
- _validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
- copy_mock.assert_called_once_with(file_mock.fileno(),
- input_mock.__enter__().fileno(),
- 0, 42)
-
- @mock.patch.object(os, 'sendfile', autospec=True)
- @mock.patch.object(os.path, 'getsize', return_value=42, autospec=True)
- @mock.patch.object(builtins, 'open', autospec=True)
- @mock.patch.object(os, 'access', return_value=False, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
+ @mock.patch.object(shutil, 'copyfile', autospec=True)
+ @mock.patch.object(os, 'link', autospec=True)
+ @mock.patch.object(os, 'remove', autospec=True)
@mock.patch.object(image_service.FileImageService, 'validate_href',
autospec=True)
- def test_download_copy_segmented(self, _validate_mock, stat_mock,
- access_mock, open_mock, size_mock,
- copy_mock):
- # Fake a 3G + 1k image
- chunk_size = image_service.SENDFILE_CHUNK_SIZE
- fake_image_size = chunk_size * 3 + 1024
- fake_chunk_seq = [chunk_size, chunk_size, chunk_size, 1024]
+ def test_download_copy(self, _validate_mock, remove_mock, link_mock,
+ copy_mock):
_validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
+ link_mock.side_effect = PermissionError
file_mock = mock.MagicMock(spec=io.BytesIO)
file_mock.name = 'file'
- input_mock = mock.MagicMock(spec=io.BytesIO)
- open_mock.return_value = input_mock
- size_mock.return_value = fake_image_size
- copy_mock.side_effect = fake_chunk_seq
self.service.download(self.href, file_mock)
_validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
- copy_calls = [mock.call(file_mock.fileno(),
- input_mock.__enter__().fileno(),
- chunk_size * i,
- fake_chunk_seq[i]) for i in range(4)]
- copy_mock.assert_has_calls(copy_calls)
- size_mock.assert_called_once_with(self.href_path)
-
- @mock.patch.object(os, 'remove', side_effect=OSError, autospec=True)
- @mock.patch.object(os, 'access', return_value=True, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
- @mock.patch.object(image_service.FileImageService, 'validate_href',
- autospec=True)
- def test_download_hard_link_fail(self, _validate_mock, stat_mock,
- access_mock, remove_mock):
- _validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
- file_mock = mock.MagicMock(spec=io.BytesIO)
- file_mock.name = 'file'
- self.assertRaises(exception.ImageDownloadFailed,
- self.service.download, self.href, file_mock)
- _validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
+ link_mock.assert_called_once_with(self.href_path, 'file')
+ copy_mock.assert_called_once_with(self.href_path, 'file')
- @mock.patch.object(os, 'sendfile', side_effect=OSError, autospec=True)
- @mock.patch.object(os.path, 'getsize', return_value=42, autospec=True)
- @mock.patch.object(builtins, 'open', autospec=True)
- @mock.patch.object(os, 'access', return_value=False, autospec=True)
- @mock.patch.object(os, 'stat', autospec=True)
+ @mock.patch.object(shutil, 'copyfile', autospec=True)
+ @mock.patch.object(os, 'link', autospec=True)
+ @mock.patch.object(os, 'remove', autospec=True)
@mock.patch.object(image_service.FileImageService, 'validate_href',
autospec=True)
- def test_download_copy_fail(self, _validate_mock, stat_mock, access_mock,
- open_mock, size_mock, copy_mock):
+ def test_download_copy_fail(self, _validate_mock, remove_mock, link_mock,
+ copy_mock):
_validate_mock.return_value = self.href_path
- stat_mock.return_value.st_dev = 'dev1'
+ link_mock.side_effect = PermissionError
+ copy_mock.side_effect = PermissionError
file_mock = mock.MagicMock(spec=io.BytesIO)
file_mock.name = 'file'
- input_mock = mock.MagicMock(spec=io.BytesIO)
- open_mock.return_value = input_mock
self.assertRaises(exception.ImageDownloadFailed,
self.service.download, self.href, file_mock)
_validate_mock.assert_called_once_with(mock.ANY, self.href)
- self.assertEqual(2, stat_mock.call_count)
- access_mock.assert_called_once_with(self.href_path, os.R_OK | os.W_OK)
- size_mock.assert_called_once_with(self.href_path)
+ link_mock.assert_called_once_with(self.href_path, 'file')
+ copy_mock.assert_called_once_with(self.href_path, 'file')
class ServiceGetterTestCase(base.TestCase):
diff --git a/ironic/tests/unit/common/test_rpc_service.py b/ironic/tests/unit/common/test_rpc_service.py
index 09446ecf8..752b7665a 100644
--- a/ironic/tests/unit/common/test_rpc_service.py
+++ b/ironic/tests/unit/common/test_rpc_service.py
@@ -22,6 +22,7 @@ from oslo_utils import timeutils
from ironic.common import context
from ironic.common import rpc
from ironic.common import rpc_service
+from ironic.common import service as ironic_service
from ironic.conductor import manager
from ironic.objects import base as objects_base
from ironic.tests.unit.db import base as db_base
@@ -39,6 +40,8 @@ class TestRPCService(db_base.DbTestCase):
mgr_module = "ironic.conductor.manager"
mgr_class = "ConductorManager"
self.rpc_svc = rpc_service.RPCService(host, mgr_module, mgr_class)
+ # register oslo_service DEFAULT config options
+ ironic_service.process_launcher()
self.rpc_svc.manager.dbapi = self.dbapi
@mock.patch.object(manager.ConductorManager, 'prepare_host', autospec=True)
@@ -123,7 +126,10 @@ class TestRPCService(db_base.DbTestCase):
with mock.patch.object(self.dbapi, 'get_online_conductors',
autospec=True) as mock_cond_list:
mock_cond_list.return_value = [conductor1]
- self.rpc_svc.stop()
+ with mock.patch.object(self.dbapi, 'get_nodeinfo_list',
+ autospec=True) as mock_nodeinfo_list:
+ mock_nodeinfo_list.return_value = []
+ self.rpc_svc.stop()
# single conductor so exit immediately without waiting
mock_sleep.assert_not_called()
@@ -139,7 +145,11 @@ class TestRPCService(db_base.DbTestCase):
autospec=True) as mock_cond_list:
# multiple conductors, so wait for hash_ring_reset_interval
mock_cond_list.return_value = [conductor1, conductor2]
- self.rpc_svc.stop()
+ with mock.patch.object(self.dbapi, 'get_nodeinfo_list',
+ autospec=True) as mock_nodeinfo_list:
+ mock_nodeinfo_list.return_value = []
+ self.rpc_svc.stop()
+ mock_nodeinfo_list.assert_called_once()
# wait the total CONF.hash_ring_reset_interval 15 seconds
mock_sleep.assert_has_calls([mock.call(15)])
@@ -160,7 +170,11 @@ class TestRPCService(db_base.DbTestCase):
autospec=True) as mock_cond_list:
# multiple conductors, so wait for hash_ring_reset_interval
mock_cond_list.return_value = [conductor1, conductor2]
- self.rpc_svc.stop()
+ with mock.patch.object(self.dbapi, 'get_nodeinfo_list',
+ autospec=True) as mock_nodeinfo_list:
+ mock_nodeinfo_list.return_value = []
+ self.rpc_svc.stop()
+ mock_nodeinfo_list.assert_called_once()
# wait the remaining 10 seconds
mock_sleep.assert_has_calls([mock.call(10)])
@@ -181,7 +195,35 @@ class TestRPCService(db_base.DbTestCase):
autospec=True) as mock_cond_list:
# multiple conductors, so wait for hash_ring_reset_interval
mock_cond_list.return_value = [conductor1, conductor2]
- self.rpc_svc.stop()
+ with mock.patch.object(self.dbapi, 'get_nodeinfo_list',
+ autospec=True) as mock_nodeinfo_list:
+ mock_nodeinfo_list.return_value = []
+ self.rpc_svc.stop()
+ mock_nodeinfo_list.assert_called_once()
# no wait required, CONF.hash_ring_reset_interval already exceeded
mock_sleep.assert_not_called()
+
+ @mock.patch.object(timeutils, 'utcnow', autospec=True)
+ @mock.patch.object(time, 'sleep', autospec=True)
+ def test_stop_has_reserved(self, mock_sleep, mock_utcnow):
+ mock_utcnow.return_value = datetime.datetime(2023, 2, 2, 21, 10, 0)
+ conductor1 = db_utils.get_test_conductor(hostname='fake_host')
+ conductor2 = db_utils.get_test_conductor(hostname='other_fake_host')
+
+ with mock.patch.object(self.dbapi, 'get_online_conductors',
+ autospec=True) as mock_cond_list:
+ # multiple conductors, so wait for hash_ring_reset_interval
+ mock_cond_list.return_value = [conductor1, conductor2]
+ with mock.patch.object(self.dbapi, 'get_nodeinfo_list',
+ autospec=True) as mock_nodeinfo_list:
+ # 3 calls to manager has_reserved until all reservation locks
+ # are released
+ mock_nodeinfo_list.side_effect = [['a', 'b'], ['a'], []]
+ self.rpc_svc.stop()
+ self.assertEqual(3, mock_nodeinfo_list.call_count)
+
+ # wait the remaining 15 seconds, then wait until has_reserved
+ # returns False
+ mock_sleep.assert_has_calls(
+ [mock.call(15), mock.call(1), mock.call(1)])
diff --git a/ironic/tests/unit/conductor/test_cleaning.py b/ironic/tests/unit/conductor/test_cleaning.py
index 34e805deb..cdfbf14ee 100644
--- a/ironic/tests/unit/conductor/test_cleaning.py
+++ b/ironic/tests/unit/conductor/test_cleaning.py
@@ -436,6 +436,36 @@ class DoNodeCleanTestCase(db_base.DbTestCase):
self.assertFalse(node.maintenance)
self.assertIsNone(node.fault)
+ @mock.patch('ironic.drivers.modules.fake.FakePower.set_power_state',
+ autospec=True)
+ @mock.patch.object(n_flat.FlatNetwork, 'validate', autospec=True)
+ @mock.patch.object(conductor_steps, 'set_node_cleaning_steps',
+ autospec=True)
+ def test_do_node_clean_steps_fail_poweroff(self, mock_steps, mock_validate,
+ mock_power, clean_steps=None,
+ invalid_exc=True):
+ if invalid_exc:
+ mock_steps.side_effect = exception.InvalidParameterValue('invalid')
+ else:
+ mock_steps.side_effect = exception.NodeCleaningFailure('failure')
+ tgt_prov_state = states.MANAGEABLE if clean_steps else states.AVAILABLE
+ self.config(poweroff_in_cleanfail=True, group='conductor')
+ node = obj_utils.create_test_node(
+ self.context, driver='fake-hardware',
+ uuid=uuidutils.generate_uuid(),
+ provision_state=states.CLEANING,
+ power_state=states.POWER_ON,
+ target_provision_state=tgt_prov_state)
+ with task_manager.acquire(
+ self.context, node.uuid, shared=False) as task:
+ cleaning.do_node_clean(task, clean_steps=clean_steps)
+ mock_validate.assert_called_once_with(mock.ANY, task)
+ node.refresh()
+ self.assertEqual(states.CLEANFAIL, node.provision_state)
+ self.assertEqual(tgt_prov_state, node.target_provision_state)
+ mock_steps.assert_called_once_with(mock.ANY, disable_ramdisk=False)
+ self.assertTrue(mock_power.called)
+
def test__do_node_clean_automated_steps_fail(self):
for invalid in (True, False):
self.__do_node_clean_steps_fail(invalid_exc=invalid)
diff --git a/ironic/tests/unit/conductor/test_manager.py b/ironic/tests/unit/conductor/test_manager.py
index 7278486a5..5244e4c4b 100644
--- a/ironic/tests/unit/conductor/test_manager.py
+++ b/ironic/tests/unit/conductor/test_manager.py
@@ -5089,7 +5089,8 @@ class ManagerDoSyncPowerStateTestCase(db_base.DbTestCase):
self.power = self.driver.power
self.node = obj_utils.create_test_node(
self.context, driver='fake-hardware', maintenance=False,
- provision_state=states.AVAILABLE, instance_uuid=uuidutils.uuid)
+ provision_state=states.AVAILABLE,
+ instance_uuid=uuidutils.generate_uuid())
self.task = mock.Mock(spec_set=['context', 'driver', 'node',
'upgrade_lock', 'shared'])
self.task.context = self.context
diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py
index 52fc72436..27c4bfa86 100644
--- a/ironic/tests/unit/conductor/test_utils.py
+++ b/ironic/tests/unit/conductor/test_utils.py
@@ -243,11 +243,12 @@ class NodePowerActionTestCase(db_base.DbTestCase):
self.config(host='my-host')
# Required for exception handling
mock_notif.__name__ = 'NodeSetPowerStateNotification'
- node = obj_utils.create_test_node(self.context,
- uuid=uuidutils.generate_uuid(),
- driver='fake-hardware',
- instance_uuid=uuidutils.uuid,
- power_state=states.POWER_OFF)
+ node = obj_utils.create_test_node(
+ self.context,
+ uuid=uuidutils.generate_uuid(),
+ driver='fake-hardware',
+ instance_uuid=uuidutils.generate_uuid(),
+ power_state=states.POWER_OFF)
task = task_manager.TaskManager(self.context, node.uuid)
get_power_mock.return_value = states.POWER_OFF
diff --git a/playbooks/metal3-ci/run.yaml b/playbooks/metal3-ci/run.yaml
index d49ddc6de..fdee0ffde 100644
--- a/playbooks/metal3-ci/run.yaml
+++ b/playbooks/metal3-ci/run.yaml
@@ -9,6 +9,8 @@
CONTROL_PLANE_MACHINE_COUNT: 1
IMAGE_OS: ubuntu
IMAGE_USERNAME: zuul
+ IRONIC_FROM_SOURCE: "true"
+ IRONIC_SOURCE: "/home/zuul/src/opendev.org/openstack/ironic"
# NOTE(dtantsur): we don't have enough resources to provision even
# a 2-node cluster, so only provision a control plane node.
NUM_NODES: 2
diff --git a/releasenotes/notes/Cleanfail-power-off-13b5fdcc2727866a.yaml b/releasenotes/notes/Cleanfail-power-off-13b5fdcc2727866a.yaml
new file mode 100644
index 000000000..2856bac06
--- /dev/null
+++ b/releasenotes/notes/Cleanfail-power-off-13b5fdcc2727866a.yaml
@@ -0,0 +1,8 @@
+---
+features:
+ - |
+ Add new conductor conf option: [conductor]poweroff_in_cleanfail
+ (default: False). when True nodes entering clean failed state
+ will be powered off. This option may be unsafe when using
+ Cleaning to perform hardware-transformative actions such as
+ firmware upgrade.
diff --git a/releasenotes/notes/cross-link-1ffd1a4958f14fd7.yaml b/releasenotes/notes/cross-link-1ffd1a4958f14fd7.yaml
new file mode 100644
index 000000000..caac13dd4
--- /dev/null
+++ b/releasenotes/notes/cross-link-1ffd1a4958f14fd7.yaml
@@ -0,0 +1,5 @@
+---
+fixes:
+ - |
+ Fixes ``Invalid cross-device link`` in some cases when using ``file://``
+ image URLs.
diff --git a/releasenotes/notes/graceful_shutdown_wait-9a62627714b86726.yaml b/releasenotes/notes/graceful_shutdown_wait-9a62627714b86726.yaml
new file mode 100644
index 000000000..778b7dc6f
--- /dev/null
+++ b/releasenotes/notes/graceful_shutdown_wait-9a62627714b86726.yaml
@@ -0,0 +1,15 @@
+---
+features:
+ - |
+ On shutdown the conductor will wait for at most
+ ``[DEFAULT]graceful_shutdown_timeout`` seconds for existing lock node
+ reservations to clear. Previously lock reservations were cleared
+ immediately, which in some cases would result in nodes going into a failed
+ state.
+upgrade:
+ - |
+ ``[DEFAULT]graceful_shutdown_timeout`` defaults to 60s. Systemd
+ ``TimeoutStopSec`` defaults to 30s. Kubernetes
+ ``terminationGracePeriodSeconds`` defaults to 90s. It is recommended to
+ align the value of ``[DEFAULT]graceful_shutdown_timeout`` with the graceful
+ timeout of the process manager of the conductor process. \ No newline at end of file
diff --git a/tools/test-setup.sh b/tools/test-setup.sh
index 16974adb5..924f0c824 100755
--- a/tools/test-setup.sh
+++ b/tools/test-setup.sh
@@ -83,5 +83,5 @@ EOF
chmod 0600 $HOME/.pgpass
# Now create our database
-psql -h 127.0.0.1 -U $DB_USER -d template1 -c "DROP DATABASE IF EXISTS openstack_citest"
+psql -h 127.0.0.1 -U $DB_USER -d postgres -c "DROP DATABASE IF EXISTS openstack_citest"
createdb -h 127.0.0.1 -U $DB_USER -l C -T template0 -E utf8 openstack_citest
diff --git a/tox.ini b/tox.ini
index 1792d81c9..1fa446d66 100644
--- a/tox.ini
+++ b/tox.ini
@@ -132,7 +132,7 @@ commands = {posargs}
# [W503] Line break before binary operator.
ignore = E129,E741,W503
filename = *.py,app.wsgi
-exclude = .venv,.git,.tox,dist,doc,*lib/python*,*egg,build
+exclude=.*,dist,doc,*lib/python*,*egg,build
import-order-style = pep8
application-import-names = ironic
max-complexity=19