diff options
22 files changed, 1127 insertions, 205 deletions
diff --git a/etc/ironic/ironic.conf.sample b/etc/ironic/ironic.conf.sample index 6d4f30401..7615fc65d 100644 --- a/etc/ironic/ironic.conf.sample +++ b/etc/ironic/ironic.conf.sample @@ -4029,11 +4029,11 @@ # 6 - <No description provided> #ip_version = 4 -# Download deploy images directly from swift using temporary -# URLs. If set to false (default), images are downloaded to -# the ironic-conductor node and served over its local HTTP -# server. Applicable only when 'ipxe_enabled' option is set to -# true. (boolean value) +# Download deploy and rescue images directly from swift using +# temporary URLs. If set to false (default), images are +# downloaded to the ironic-conductor node and served over its +# local HTTP server. Applicable only when 'ipxe_enabled' +# option is set to true. (boolean value) #ipxe_use_swift = false diff --git a/ironic/common/pxe_utils.py b/ironic/common/pxe_utils.py index 806a00cf4..292bfcf67 100644 --- a/ironic/common/pxe_utils.py +++ b/ironic/common/pxe_utils.py @@ -39,6 +39,10 @@ DHCP_BOOTFILE_NAME = '67' # rfc2132 DHCP_TFTP_SERVER_ADDRESS = '150' # rfc5859 DHCP_IPXE_ENCAP_OPTS = '175' # Tentatively Assigned DHCP_TFTP_PATH_PREFIX = '210' # rfc5071 +DEPLOY_KERNEL_RAMDISK_LABELS = ['deploy_kernel', 'deploy_ramdisk'] +RESCUE_KERNEL_RAMDISK_LABELS = ['rescue_kernel', 'rescue_ramdisk'] +KERNEL_RAMDISK_LABELS = {'deploy': DEPLOY_KERNEL_RAMDISK_LABELS, + 'rescue': RESCUE_KERNEL_RAMDISK_LABELS} def get_root_dir(): @@ -158,14 +162,25 @@ def _get_pxe_ip_address_path(ip_address, hex_form): ) -def get_deploy_kr_info(node_uuid, driver_info): - """Get href and tftp path for deploy kernel and ramdisk. +def get_kernel_ramdisk_info(node_uuid, driver_info, mode='deploy'): + """Get href and tftp path for deploy or rescue kernel and ramdisk. + + :param node_uuid: UUID of the node + :param driver_info: Node's driver_info dict + :param mode: A label to indicate whether paths for deploy or rescue + ramdisk are being requested. Supported values are 'deploy' + 'rescue'. Defaults to 'deploy', indicating deploy paths will + be returned. + :returns: a dictionary whose keys are deploy_kernel and deploy_ramdisk or + rescue_kernel and rescue_ramdisk and whose values are the + absolute paths to them. Note: driver_info should be validated outside of this method. """ root_dir = get_root_dir() image_info = {} - for label in ('deploy_kernel', 'deploy_ramdisk'): + labels = KERNEL_RAMDISK_LABELS[mode] + for label in labels: image_info[label] = ( str(driver_info[label]), os.path.join(root_dir, node_uuid, label) diff --git a/ironic/conductor/utils.py b/ironic/conductor/utils.py index e45c913cb..4ecc6776b 100644 --- a/ironic/conductor/utils.py +++ b/ironic/conductor/utils.py @@ -398,48 +398,74 @@ def cleaning_error_handler(task, msg, tear_down_cleaning=True, task.process_event('fail', target_state=target_state) -@task_manager.require_exclusive_lock -def cleanup_rescuewait_timeout(task): - """Cleanup rescue task after timeout. +def rescuing_error_handler(task, msg, set_fail_state=True): + """Cleanup rescue task after timeout or failure. :param task: a TaskManager instance. + :param msg: a message to set into node's last_error field + :param set_fail_state: a boolean flag to indicate if node needs to be + transitioned to a failed state. By default node + would be transitioned to a failed state. """ node = task.node - msg = _('Timeout reached while waiting for rescue ramdisk callback ' - 'for node') - errmsg = msg + ' %(node)s' - LOG.error(errmsg, {'node': node.uuid}) try: node_power_action(task, states.POWER_OFF) task.driver.rescue.clean_up(task) node.last_error = msg - node.save() + except exception.IronicException as e: + node.last_error = (_('Rescue operation was unsuccessful, clean up ' + 'failed for node: %(error)s') % {'error': e}) + LOG.error(('Rescue operation was unsuccessful, clean up failed for ' + 'node %(node)s: %(error)s'), + {'node': node.uuid, 'error': e}) except Exception as e: - if isinstance(e, exception.IronicException): - error_msg = _('Cleanup failed for %(node_info)s after rescue ' - 'timeout: %(error)s') - node_info = ('node') - node.last_error = error_msg % {'node_info': node_info, 'error': e} - node_info = ('node %s') % node.uuid - LOG.error(error_msg, {'node_info': node_info, 'error': e}) - else: - node.last_error = _('Rescue timed out, but an unhandled ' - 'exception was encountered while aborting. ' - 'More info may be found in the log file.') - LOG.exception('Rescue timed out for node %(node)s, an exception ' - 'was encountered while aborting. Error: %(err)s', - {'node': node.uuid, 'err': e}) + node.last_error = (_('Rescue failed, but an unhandled exception was ' + 'encountered while aborting: %(error)s') % + {'error': e}) + LOG.exception('Rescue failed for node %(node)s, an exception was ' + 'encountered while aborting.', {'node': node.uuid}) + finally: node.save() + if set_fail_state: + try: + task.process_event('fail') + except exception.InvalidState: + node = task.node + LOG.error('Internal error. Node %(node)s in provision state ' + '"%(state)s" could not transition to a failed state.', + {'node': node.uuid, 'state': node.provision_state}) + + +@task_manager.require_exclusive_lock +def cleanup_rescuewait_timeout(task): + """Cleanup rescue task after timeout. -def _spawn_error_handler(e, node, state): - """Handle spawning error for node.""" + :param task: a TaskManager instance. + """ + msg = _('Timeout reached while waiting for rescue ramdisk callback ' + 'for node') + errmsg = msg + ' %(node)s' + LOG.error(errmsg, {'node': task.node.uuid}) + rescuing_error_handler(task, msg, set_fail_state=False) + + +def _spawn_error_handler(e, node, operation): + """Handle error while trying to spawn a process. + + Handle error while trying to spawn a process to perform an + operation on a node. + + :param e: the exception object that was raised. + :param node: an Ironic node object. + :param operation: the operation being performed on the node. + """ if isinstance(e, exception.NoFreeConductorWorker): node.last_error = (_("No free conductor workers available")) node.save() LOG.warning("No free conductor workers available to perform " "%(operation)s on node %(node)s", - {'operation': state, 'node': node.uuid}) + {'operation': operation, 'node': node.uuid}) def spawn_cleaning_error_handler(e, node): diff --git a/ironic/conf/pxe.py b/ironic/conf/pxe.py index cb251d9a2..229d4e7eb 100644 --- a/ironic/conf/pxe.py +++ b/ironic/conf/pxe.py @@ -118,8 +118,8 @@ opts = [ 'Defaults to 4. EXPERIMENTAL')), cfg.BoolOpt('ipxe_use_swift', default=False, - help=_("Download deploy images directly from swift using " - "temporary URLs. " + help=_("Download deploy and rescue images directly from swift " + "using temporary URLs. " "If set to false (default), images are downloaded " "to the ironic-conductor node and served over its " "local HTTP server. " diff --git a/ironic/drivers/base.py b/ironic/drivers/base.py index ab00736ec..1c14cca72 100644 --- a/ironic/drivers/base.py +++ b/ironic/drivers/base.py @@ -419,10 +419,10 @@ class BootInterface(BaseInterface): interface_type = 'boot' @abc.abstractmethod - def prepare_ramdisk(self, task, ramdisk_params): + def prepare_ramdisk(self, task, ramdisk_params, mode='deploy'): """Prepares the boot of Ironic ramdisk. - This method prepares the boot of the deploy ramdisk after + This method prepares the boot of the deploy or rescue ramdisk after reading relevant information from the node's database. :param task: a task from TaskManager. @@ -436,17 +436,25 @@ class BootInterface(BaseInterface): Other implementations can make use of ramdisk_params to pass such information. Different implementations of boot interface will have different ways of passing parameters to the ramdisk. + :param mode: Label indicating a deploy or rescue operation + being carried out on the node. Supported values are 'deploy' and + 'rescue'. Defaults to 'deploy', indicating deploy operation is + being carried out. :returns: None """ @abc.abstractmethod - def clean_up_ramdisk(self, task): + def clean_up_ramdisk(self, task, mode='deploy'): """Cleans up the boot of ironic ramdisk. This method cleans up the environment that was setup for booting the - deploy ramdisk. + deploy or rescue ramdisk. :param task: a task from TaskManager. + :param mode: Label indicating a deploy or rescue operation + was carried out on the node. Supported values are 'deploy' and + 'rescue'. Defaults to 'deploy', indicating deploy operation was + carried out. :returns: None """ diff --git a/ironic/drivers/generic.py b/ironic/drivers/generic.py index a651ac667..5292ec079 100644 --- a/ironic/drivers/generic.py +++ b/ironic/drivers/generic.py @@ -34,8 +34,7 @@ from ironic.drivers.modules.storage import noop as noop_storage class GenericHardware(hardware_type.AbstractHardwareType): """Abstract base class representing generic hardware. - This class provides reasonable defaults for boot, deploy, inspect, network - and raid interfaces. + This class provides reasonable defaults for all of the interfaces. """ @property @@ -70,6 +69,13 @@ class GenericHardware(hardware_type.AbstractHardwareType): return [noop.NoRAID, agent.AgentRAID] @property + def supported_rescue_interfaces(self): + """List of supported rescue interfaces.""" + # AgentRescue requires IPA with the rescue extension enabled, so + # NoRescue is the default + return [noop.NoRescue, agent.AgentRescue] + + @property def supported_storage_interfaces(self): """List of supported storage interfaces.""" return [noop_storage.NoopStorage, cinder.CinderStorage] diff --git a/ironic/drivers/modules/agent.py b/ironic/drivers/modules/agent.py index e4eeb84a3..bb147bd1b 100644 --- a/ironic/drivers/modules/agent.py +++ b/ironic/drivers/modules/agent.py @@ -15,6 +15,7 @@ from ironic_lib import metrics_utils from ironic_lib import utils as il_utils from oslo_log import log +from oslo_utils import reflection from oslo_utils import units import six.moves.urllib_parse as urlparse @@ -32,6 +33,7 @@ from ironic.conf import CONF from ironic.drivers import base from ironic.drivers.modules import agent_base_vendor from ironic.drivers.modules import deploy_utils +from ironic.drivers.modules.network import neutron LOG = log.getLogger(__name__) @@ -58,6 +60,14 @@ OPTIONAL_PROPERTIES = { '``image_https_proxy`` are not specified. Optional.'), } +RESCUE_PROPERTIES = { + 'rescue_kernel': _('UUID (from Glance) of the rescue kernel. This value ' + 'is required for rescue mode.'), + 'rescue_ramdisk': _('UUID (from Glance) of the rescue ramdisk with agent ' + 'that is used at node rescue time. This value is ' + 'required for rescue mode.'), +} + COMMON_PROPERTIES = REQUIRED_PROPERTIES.copy() COMMON_PROPERTIES.update(OPTIONAL_PROPERTIES) COMMON_PROPERTIES.update(agent_base_vendor.VENDOR_PROPERTIES) @@ -460,13 +470,15 @@ class AgentDeploy(AgentDeployMixin, base.DeployInterface): # backend storage system, and we can return to the caller # as we do not need to boot the agent to deploy. return - if node.provision_state == states.ACTIVE: + if node.provision_state in (states.ACTIVE, states.UNRESCUING): # Call is due to conductor takeover task.driver.boot.prepare_instance(task) elif node.provision_state != states.ADOPTING: - node.instance_info = deploy_utils.build_instance_info_for_deploy( - task) - node.save() + if node.provision_state not in (states.RESCUING, states.RESCUEWAIT, + states.RESCUE, states.RESCUEFAIL): + node.instance_info = ( + deploy_utils.build_instance_info_for_deploy(task)) + node.save() if CONF.agent.manage_agent_boot: deploy_opts = deploy_utils.build_agent_options(node) task.driver.boot.prepare_ramdisk(task, deploy_opts) @@ -693,3 +705,133 @@ class AgentRAID(base.RAIDInterface): """ task.node.raid_config = {} task.node.save() + + +class AgentRescue(base.RescueInterface): + """Implementation of RescueInterface which uses agent ramdisk.""" + + def get_properties(self): + """Return the properties of the interface. + + :returns: dictionary of <property name>:<property description> entries. + """ + return RESCUE_PROPERTIES.copy() + + @METRICS.timer('AgentRescue.rescue') + @task_manager.require_exclusive_lock + def rescue(self, task): + """Boot a rescue ramdisk on the node. + + :param task: a TaskManager instance. + :raises: NetworkError if the tenant ports cannot be removed. + :raises: InvalidParameterValue when the wrong power state is specified + or the wrong driver info is specified for power management. + :raises: other exceptions by the node's power driver if something + wrong occurred during the power action. + :raises: any boot interface's prepare_ramdisk exceptions. + :returns: Returns states.RESCUEWAIT + """ + manager_utils.node_power_action(task, states.POWER_OFF) + task.driver.boot.clean_up_instance(task) + task.driver.network.unconfigure_tenant_networks(task) + task.driver.network.add_rescuing_network(task) + if CONF.agent.manage_agent_boot: + ramdisk_opts = deploy_utils.build_agent_options(task.node) + # prepare_ramdisk will set the boot device + task.driver.boot.prepare_ramdisk(task, ramdisk_opts, mode='rescue') + manager_utils.node_power_action(task, states.POWER_ON) + + return states.RESCUEWAIT + + @METRICS.timer('AgentRescue.unrescue') + @task_manager.require_exclusive_lock + def unrescue(self, task): + """Attempt to move a rescued node back to active state. + + :param task: a TaskManager instance. + :raises: NetworkError if the rescue ports cannot be removed. + :raises: InvalidParameterValue when the wrong power state is specified + or the wrong driver info is specified for power management. + :raises: other exceptions by the node's power driver if something + wrong occurred during the power action. + :raises: any boot interface's prepare_instance exceptions. + :returns: Returns states.ACTIVE + """ + manager_utils.node_power_action(task, states.POWER_OFF) + self.clean_up(task) + task.driver.network.configure_tenant_networks(task) + task.driver.boot.prepare_instance(task) + manager_utils.node_power_action(task, states.POWER_ON) + + return states.ACTIVE + + @METRICS.timer('AgentRescue.validate') + def validate(self, task): + """Validate that the node has required properties for agent rescue. + + :param task: a TaskManager instance with the node being checked + :raises: InvalidParameterValue if 'instance_info/rescue_password' has + empty password or rescuing network UUID config option + has an invalid value when 'neutron' network is used. + :raises: MissingParameterValue if node is missing one or more required + parameters + :raises: IncompatibleInterface if 'prepare_ramdisk' and + 'clean_up_ramdisk' of node's boot interface do not support 'mode' + argument. + """ + node = task.node + missing_params = [] + + # Validate rescuing network if node is using 'neutron' network + if isinstance(task.driver.network, neutron.NeutronNetwork): + task.driver.network.get_rescuing_network_uuid(task) + + if CONF.agent.manage_agent_boot: + if ('mode' not in reflection.get_signature( + task.driver.boot.prepare_ramdisk).parameters or + 'mode' not in reflection.get_signature( + task.driver.boot.clean_up_ramdisk).parameters): + raise exception.IncompatibleInterface( + interface_type='boot', + interface_impl="of 'prepare_ramdisk' and/or " + "'clean_up_ramdisk' with 'mode' argument", + hardware_type=node.driver) + # TODO(stendulker): boot.validate() performs validation of + # provisioning related parameters which is not required during + # rescue operation. + task.driver.boot.validate(task) + for req in RESCUE_PROPERTIES: + if node.driver_info.get(req) is None: + missing_params.append('driver_info/' + req) + + rescue_pass = node.instance_info.get('rescue_password') + if rescue_pass is None: + missing_params.append('instance_info/rescue_password') + + if missing_params: + msg = _('Node %(node)s is missing parameter(s): ' + '%(params)s. These are required for rescuing node.') + raise exception.MissingParameterValue( + msg % {'node': node.uuid, + 'params': ', '.join(missing_params)}) + + if not rescue_pass.strip(): + msg = (_("The 'instance_info/rescue_password' is an empty string " + "for node %s. The 'rescue_password' must be a non-empty " + "string value.") % node.uuid) + raise exception.InvalidParameterValue(msg) + + @METRICS.timer('AgentRescue.clean_up') + def clean_up(self, task): + """Clean up after RESCUEWAIT timeout/failure or finishing rescue. + + Rescue password should be removed from the node and ramdisk boot + environment should be cleaned if Ironic is managing the ramdisk boot. + + :param task: a TaskManager instance with the node. + :raises: NetworkError if the rescue ports cannot be removed. + """ + manager_utils.remove_node_rescue_password(task.node, save=True) + if CONF.agent.manage_agent_boot: + task.driver.boot.clean_up_ramdisk(task, mode='rescue') + task.driver.network.remove_rescuing_network(task) diff --git a/ironic/drivers/modules/agent_base_vendor.py b/ironic/drivers/modules/agent_base_vendor.py index 482c7f1ad..3b927c6da 100644 --- a/ironic/drivers/modules/agent_base_vendor.py +++ b/ironic/drivers/modules/agent_base_vendor.py @@ -269,7 +269,7 @@ class HeartbeatMixin(object): @property def heartbeat_allowed_states(self): """Define node states where heartbeating is allowed""" - return (states.DEPLOYWAIT, states.CLEANWAIT) + return (states.DEPLOYWAIT, states.CLEANWAIT, states.RESCUEWAIT) @METRICS.timer('HeartbeatMixin.heartbeat') def heartbeat(self, task, callback_url, agent_version): @@ -334,17 +334,50 @@ class HeartbeatMixin(object): else: msg = _('Node failed to check cleaning progress.') self.continue_cleaning(task) - + elif (node.provision_state == states.RESCUEWAIT): + msg = _('Node failed to perform rescue operation.') + self._finalize_rescue(task) except Exception as e: - err_info = {'node': node.uuid, 'msg': msg, 'e': e} - last_error = _('Asynchronous exception for node %(node)s: ' - '%(msg)s Exception: %(e)s') % err_info - LOG.exception(last_error) + err_info = {'msg': msg, 'e': e} + last_error = _('Asynchronous exception: %(msg)s ' + 'Exception: %(e)s for node') % err_info + errmsg = last_error + ' %(node)s' + LOG.exception(errmsg, {'node': node.uuid}) if node.provision_state in (states.CLEANING, states.CLEANWAIT): manager_utils.cleaning_error_handler(task, last_error) elif node.provision_state in (states.DEPLOYING, states.DEPLOYWAIT): deploy_utils.set_failed_state( task, last_error, collect_logs=bool(self._client)) + elif node.provision_state in (states.RESCUING, states.RESCUEWAIT): + manager_utils.rescuing_error_handler(task, last_error) + + def _finalize_rescue(self, task): + """Call ramdisk to prepare rescue mode and verify result. + + :param task: A TaskManager instance + :raises: InstanceRescueFailure, if rescuing failed + """ + node = task.node + try: + result = self._client.finalize_rescue(node) + except exception.IronicException as e: + raise exception.InstanceRescueFailure(node=node.uuid, + instance=node.instance_uuid, + reason=e) + if ((not result.get('command_status')) or + result.get('command_status') != 'SUCCEEDED'): + # NOTE(mariojv) Caller will clean up failed rescue in exception + # handler. + fail_reason = (_('Agent returned bad result for command ' + 'finalize_rescue: %(result)s') % + {'result': result.get('command_error')}) + raise exception.InstanceRescueFailure(node=node.uuid, + instance=node.instance_uuid, + reason=fail_reason) + task.process_event('resume') + task.driver.rescue.clean_up(task) + task.driver.network.configure_tenant_networks(task) + task.process_event('done') class AgentDeployMixin(HeartbeatMixin): diff --git a/ironic/drivers/modules/agent_client.py b/ironic/drivers/modules/agent_client.py index 6db77be5a..3ffa7d081 100644 --- a/ironic/drivers/modules/agent_client.py +++ b/ironic/drivers/modules/agent_client.py @@ -213,3 +213,16 @@ class AgentClient(object): method='log.collect_system_logs', params={}, wait=True) + + @METRICS.timer('AgentClient.finalize_rescue') + def finalize_rescue(self, node): + """Instruct the ramdisk to finalize entering of rescue mode.""" + rescue_pass = node.instance_info.get('rescue_password') + if not rescue_pass: + raise exception.IronicException(_('Agent rescue requires ' + 'rescue_password in ' + 'instance_info')) + params = {'rescue_password': rescue_pass} + return self._command(node=node, + method='rescue.finalize_rescue', + params=params) diff --git a/ironic/drivers/modules/deploy_utils.py b/ironic/drivers/modules/deploy_utils.py index d42a4174e..250aa738e 100644 --- a/ironic/drivers/modules/deploy_utils.py +++ b/ironic/drivers/modules/deploy_utils.py @@ -65,6 +65,10 @@ SUPPORTED_CAPABILITIES = { 'disk_label': ('msdos', 'gpt'), } +# States related to rescue mode. +RESCUE_LIKE_STATES = (states.RESCUING, states.RESCUEWAIT, states.RESCUEFAIL, + states.UNRESCUING, states.UNRESCUEFAIL) + DISK_LAYOUT_PARAMS = ('root_gb', 'swap_mb', 'ephemeral_gb') diff --git a/ironic/drivers/modules/fake.py b/ironic/drivers/modules/fake.py index f60c55258..816cc730e 100644 --- a/ironic/drivers/modules/fake.py +++ b/ironic/drivers/modules/fake.py @@ -79,10 +79,10 @@ class FakeBoot(base.BootInterface): def validate(self, task): pass - def prepare_ramdisk(self, task, ramdisk_params): + def prepare_ramdisk(self, task, ramdisk_params, mode='deploy'): pass - def clean_up_ramdisk(self, task): + def clean_up_ramdisk(self, task, mode='deploy'): pass def prepare_instance(self, task): diff --git a/ironic/drivers/modules/pxe.py b/ironic/drivers/modules/pxe.py index 76cc5f5ac..8a820a6ea 100644 --- a/ironic/drivers/modules/pxe.py +++ b/ironic/drivers/modules/pxe.py @@ -59,19 +59,26 @@ COMMON_PROPERTIES = REQUIRED_PROPERTIES.copy() COMMON_PROPERTIES.update(OPTIONAL_PROPERTIES) -def _parse_driver_info(node): +def _parse_driver_info(node, mode='deploy'): """Gets the driver specific Node deployment info. This method validates whether the 'driver_info' property of the supplied node contains the required information for this driver to - deploy images to the node. + deploy images to, or rescue, the node. :param node: a single Node. + :param mode: Label indicating a deploy or rescue operation being + carried out on the node. Supported values are + 'deploy' and 'rescue'. Defaults to 'deploy', indicating + deploy operation is being carried out. :returns: A dict with the driver_info values. :raises: MissingParameterValue """ info = node.driver_info - d_info = {k: info.get(k) for k in ('deploy_kernel', 'deploy_ramdisk')} + + params_to_check = pxe_utils.KERNEL_RAMDISK_LABELS[mode] + + d_info = {k: info.get(k) for k in params_to_check} error_msg = _("Cannot validate PXE bootloader. Some parameters were" " missing in node's driver_info") deploy_utils.check_for_missing_params(d_info, error_msg) @@ -121,29 +128,37 @@ def _get_instance_image_info(node, ctx): return image_info -def _get_deploy_image_info(node): - """Generate the paths for TFTP files for deploy images. +def _get_image_info(node, mode='deploy'): + """Generate the paths for TFTP files for deploy or rescue images. - This method generates the paths for the deploy kernel and - deploy ramdisk. + This method generates the paths for the deploy (or rescue) kernel and + deploy (or rescue) ramdisk. :param node: a node object - :returns: a dictionary whose keys are the names of the images ( - deploy_kernel, deploy_ramdisk) and values are the absolute - paths of them. - :raises: MissingParameterValue, if deploy_kernel/deploy_ramdisk is - missing in node's driver_info. + :param mode: Label indicating a deploy or rescue operation being + carried out on the node. Supported values are 'deploy' and 'rescue'. + Defaults to 'deploy', indicating deploy operation is being carried out. + :returns: a dictionary whose keys are the names of the images + (deploy_kernel, deploy_ramdisk, or rescue_kernel, rescue_ramdisk) and + values are the absolute paths of them. + :raises: MissingParameterValue, if deploy_kernel/deploy_ramdisk or + rescue_kernel/rescue_ramdisk is missing in node's driver_info. """ - d_info = _parse_driver_info(node) - return pxe_utils.get_deploy_kr_info(node.uuid, d_info) + d_info = _parse_driver_info(node, mode=mode) + + return pxe_utils.get_kernel_ramdisk_info( + node.uuid, d_info, mode=mode) -def _build_deploy_pxe_options(task, pxe_info): +def _build_deploy_pxe_options(task, pxe_info, mode='deploy'): pxe_opts = {} node = task.node - for label, option in (('deploy_kernel', 'deployment_aki_path'), - ('deploy_ramdisk', 'deployment_ari_path')): + kernel_label = '%s_kernel' % mode + ramdisk_label = '%s_ramdisk' % mode + + for label, option in ((kernel_label, 'deployment_aki_path'), + (ramdisk_label, 'deployment_ari_path')): if CONF.pxe.ipxe_enabled: image_href = pxe_info[label][0] if (CONF.pxe.ipxe_use_swift and @@ -218,20 +233,25 @@ def _build_pxe_config_options(task, pxe_info, service=False): :returns: A dictionary of pxe options to be used in the pxe bootfile template. """ + node = task.node + mode = ('rescue' if node.provision_state in deploy_utils.RESCUE_LIKE_STATES + else 'deploy') if service: pxe_options = {} - elif (task.node.driver_internal_info.get('boot_from_volume') and + elif (node.driver_internal_info.get('boot_from_volume') and CONF.pxe.ipxe_enabled): pxe_options = _get_volume_pxe_options(task) else: - pxe_options = _build_deploy_pxe_options(task, pxe_info) - - # NOTE(pas-ha) we still must always add user image kernel and ramdisk info - # as later during switching PXE config to service mode the template - # will not be regenerated anew, but instead edited as-is. - # This can be changed later if/when switching PXE config will also use - # proper templating instead of editing existing files on disk. - pxe_options.update(_build_instance_pxe_options(task, pxe_info)) + pxe_options = _build_deploy_pxe_options(task, pxe_info, mode=mode) + + if mode == 'deploy': + # NOTE(pas-ha) we still must always add user image kernel and ramdisk + # info as later during switching PXE config to service mode the + # template will not be regenerated anew, but instead edited as-is. + # This can be changed later if/when switching PXE config will also use + # proper templating instead of editing existing files on disk. + pxe_options.update(_build_instance_pxe_options(task, pxe_info)) + pxe_options.update(_build_extra_pxe_options()) return pxe_options @@ -241,10 +261,10 @@ def _build_service_pxe_config(task, instance_image_info, root_uuid_or_disk_id): node = task.node pxe_config_path = pxe_utils.get_pxe_config_file_path(node.uuid) - # NOTE(pas-ha) if it is takeover of ACTIVE node, - # first ensure that basic PXE configs and links + # NOTE(pas-ha) if it is takeover of ACTIVE node or node performing + # unrescue operation, first ensure that basic PXE configs and links # are in place before switching pxe config - if (node.provision_state == states.ACTIVE and + if (node.provision_state in [states.ACTIVE, states.UNRESCUING] and not os.path.isfile(pxe_config_path)): pxe_options = _build_pxe_config_options(task, instance_image_info, service=True) @@ -435,7 +455,7 @@ class PXEBoot(base.BootInterface): _parse_driver_info(node) # NOTE(TheJulia): If we're not writing an image, we can skip # the remainder of this method. - if not task.driver.storage.should_write_image(task): + if (not task.driver.storage.should_write_image(task)): return d_info = deploy_utils.get_image_instance_info(node) @@ -449,17 +469,21 @@ class PXEBoot(base.BootInterface): deploy_utils.validate_image_properties(task.context, d_info, props) @METRICS.timer('PXEBoot.prepare_ramdisk') - def prepare_ramdisk(self, task, ramdisk_params): + def prepare_ramdisk(self, task, ramdisk_params, mode='deploy'): """Prepares the boot of Ironic ramdisk using PXE. - This method prepares the boot of the deploy kernel/ramdisk after - reading relevant information from the node's driver_info and + This method prepares the boot of the deploy or rescue kernel/ramdisk + after reading relevant information from the node's driver_info and instance_info. :param task: a task from TaskManager. :param ramdisk_params: the parameters to be passed to the ramdisk. pxe driver passes these parameters as kernel command-line arguments. + :param mode: Label indicating a deploy or rescue operation + being carried out on the node. Supported values are + 'deploy' and 'rescue'. Defaults to 'deploy', indicating + deploy operation is being carried out. :returns: None :raises: MissingParameterValue, if some information is missing in node's driver_info or instance_info. @@ -482,7 +506,7 @@ class PXEBoot(base.BootInterface): provider = dhcp_factory.DHCPFactory() provider.update_dhcp(task, dhcp_opts) - pxe_info = _get_deploy_image_info(node) + pxe_info = _get_image_info(node, mode=mode) # NODE: Try to validate and fetch instance images only # if we are in DEPLOYING state. @@ -503,29 +527,37 @@ class PXEBoot(base.BootInterface): persistent=persistent) if CONF.pxe.ipxe_enabled and CONF.pxe.ipxe_use_swift: - pxe_info.pop('deploy_kernel', None) - pxe_info.pop('deploy_ramdisk', None) + kernel_label = '%s_kernel' % mode + ramdisk_label = '%s_ramdisk' % mode + pxe_info.pop(kernel_label, None) + pxe_info.pop(ramdisk_label, None) + if pxe_info: _cache_ramdisk_kernel(task.context, node, pxe_info) @METRICS.timer('PXEBoot.clean_up_ramdisk') - def clean_up_ramdisk(self, task): + def clean_up_ramdisk(self, task, mode='deploy'): """Cleans up the boot of ironic ramdisk. This method cleans up the PXE environment that was setup for booting - the deploy ramdisk. It unlinks the deploy kernel/ramdisk in the node's - directory in tftproot and removes it's PXE config. + the deploy or rescue ramdisk. It unlinks the deploy/rescue + kernel/ramdisk in the node's directory in tftproot and removes it's PXE + config. :param task: a task from TaskManager. + :param mode: Label indicating a deploy or rescue operation + was carried out on the node. Supported values are 'deploy' and + 'rescue'. Defaults to 'deploy', indicating deploy operation was + carried out. :returns: None """ node = task.node try: - images_info = _get_deploy_image_info(node) + images_info = _get_image_info(node, mode=mode) except exception.MissingParameterValue as e: - LOG.warning('Could not get deploy image info ' + LOG.warning('Could not get %(mode)s image info ' 'to clean up images for node %(node)s: %(err)s', - {'node': node.uuid, 'err': e}) + {'mode': mode, 'node': node.uuid, 'err': e}) else: _clean_up_pxe_env(task, images_info) diff --git a/ironic/tests/unit/common/test_pxe_utils.py b/ironic/tests/unit/common/test_pxe_utils.py index 8ca347dc4..9b16addc0 100644 --- a/ironic/tests/unit/common/test_pxe_utils.py +++ b/ironic/tests/unit/common/test_pxe_utils.py @@ -646,43 +646,53 @@ class TestPXEUtils(db_base.DbTestCase): def test_dhcp_options_for_instance_ipv6(self): self._dhcp_options_for_instance(ip_version=6) - def _test_get_deploy_kr_info(self, expected_dir): + def _test_get_kernel_ramdisk_info(self, expected_dir, mode='deploy'): node_uuid = 'fake-node' - driver_info = { - 'deploy_kernel': 'glance://deploy-kernel', - 'deploy_ramdisk': 'glance://deploy-ramdisk', - } - expected = { - 'deploy_kernel': ('glance://deploy-kernel', - expected_dir + '/fake-node/deploy_kernel'), - 'deploy_ramdisk': ('glance://deploy-ramdisk', - expected_dir + '/fake-node/deploy_ramdisk'), + driver_info = { + '%s_kernel' % mode: 'glance://%s-kernel' % mode, + '%s_ramdisk' % mode: 'glance://%s-ramdisk' % mode, } - kr_info = pxe_utils.get_deploy_kr_info(node_uuid, driver_info) + expected = {} + for k, v in driver_info.items(): + expected[k] = (v, expected_dir + '/fake-node/%s' % k) + kr_info = pxe_utils.get_kernel_ramdisk_info(node_uuid, + driver_info, + mode=mode) self.assertEqual(expected, kr_info) - def test_get_deploy_kr_info(self): + def test_get_kernel_ramdisk_info(self): expected_dir = '/tftp' self.config(tftp_root=expected_dir, group='pxe') - self._test_get_deploy_kr_info(expected_dir) + self._test_get_kernel_ramdisk_info(expected_dir) - def test_get_deploy_kr_info_ipxe(self): + def test_get_kernel_ramdisk_info_ipxe(self): expected_dir = '/http' self.config(ipxe_enabled=True, group='pxe') self.config(http_root=expected_dir, group='deploy') - self._test_get_deploy_kr_info(expected_dir) + self._test_get_kernel_ramdisk_info(expected_dir) - def test_get_deploy_kr_info_bad_driver_info(self): + def test_get_kernel_ramdisk_info_bad_driver_info(self): self.config(tftp_root='/tftp', group='pxe') node_uuid = 'fake-node' driver_info = {} self.assertRaises(KeyError, - pxe_utils.get_deploy_kr_info, + pxe_utils.get_kernel_ramdisk_info, node_uuid, driver_info) + def test_get_rescue_kr_info(self): + expected_dir = '/tftp' + self.config(tftp_root=expected_dir, group='pxe') + self._test_get_kernel_ramdisk_info(expected_dir, mode='rescue') + + def test_get_rescue_kr_info_ipxe(self): + expected_dir = '/http' + self.config(ipxe_enabled=True, group='pxe') + self.config(http_root=expected_dir, group='deploy') + self._test_get_kernel_ramdisk_info(expected_dir, mode='rescue') + def _dhcp_options_for_instance_ipxe(self, task, boot_file): self.config(tftp_server='192.0.2.1', group='pxe') self.config(ipxe_enabled=True, group='pxe') diff --git a/ironic/tests/unit/conductor/test_utils.py b/ironic/tests/unit/conductor/test_utils.py index 9eb75e6db..711e42fdf 100644 --- a/ironic/tests/unit/conductor/test_utils.py +++ b/ironic/tests/unit/conductor/test_utils.py @@ -1271,10 +1271,94 @@ class ErrorHandlersTestCase(tests_base.TestCase): self.assertTrue(log_mock.error.called) node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF) self.task.driver.rescue.clean_up.assert_called_once_with(self.task) - self.assertIn('Rescue timed out', self.node.last_error) + self.assertIn('Rescue failed', self.node.last_error) self.node.save.assert_called_once_with() self.assertTrue(log_mock.exception.called) + @mock.patch.object(conductor_utils, 'node_power_action') + def _test_rescuing_error_handler(self, node_power_mock, + set_state=True): + self.node.provision_state = states.RESCUEWAIT + conductor_utils.rescuing_error_handler(self.task, + 'some exception for node', + set_fail_state=set_state) + node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF) + self.task.driver.rescue.clean_up.assert_called_once_with(self.task) + self.node.save.assert_called_once_with() + if set_state: + self.assertTrue(self.task.process_event.called) + else: + self.assertFalse(self.task.process_event.called) + + def test_rescuing_error_handler(self): + self._test_rescuing_error_handler() + + def test_rescuing_error_handler_set_failed_state_false(self): + self._test_rescuing_error_handler(set_state=False) + + @mock.patch.object(conductor_utils.LOG, 'error') + @mock.patch.object(conductor_utils, 'node_power_action') + def test_rescuing_error_handler_ironic_exc(self, node_power_mock, + log_mock): + self.node.provision_state = states.RESCUEWAIT + expected_exc = exception.IronicException('moocow') + clean_up_mock = self.task.driver.rescue.clean_up + clean_up_mock.side_effect = expected_exc + conductor_utils.rescuing_error_handler(self.task, + 'some exception for node') + node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF) + self.task.driver.rescue.clean_up.assert_called_once_with(self.task) + log_mock.assert_called_once_with('Rescue operation was unsuccessful, ' + 'clean up failed for node %(node)s: ' + '%(error)s', + {'node': self.node.uuid, + 'error': expected_exc}) + self.node.save.assert_called_once_with() + + @mock.patch.object(conductor_utils.LOG, 'exception') + @mock.patch.object(conductor_utils, 'node_power_action') + def test_rescuing_error_handler_other_exc(self, node_power_mock, + log_mock): + self.node.provision_state = states.RESCUEWAIT + expected_exc = RuntimeError() + clean_up_mock = self.task.driver.rescue.clean_up + clean_up_mock.side_effect = expected_exc + conductor_utils.rescuing_error_handler(self.task, + 'some exception for node') + node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF) + self.task.driver.rescue.clean_up.assert_called_once_with(self.task) + log_mock.assert_called_once_with('Rescue failed for node ' + '%(node)s, an exception was ' + 'encountered while aborting.', + {'node': self.node.uuid}) + self.node.save.assert_called_once_with() + + @mock.patch.object(conductor_utils.LOG, 'error') + @mock.patch.object(conductor_utils, 'node_power_action') + def test_rescuing_error_handler_bad_state(self, node_power_mock, + log_mock): + self.node.provision_state = states.RESCUE + self.task.process_event.side_effect = exception.InvalidState + expected_exc = exception.IronicException('moocow') + clean_up_mock = self.task.driver.rescue.clean_up + clean_up_mock.side_effect = expected_exc + conductor_utils.rescuing_error_handler(self.task, + 'some exception for node') + node_power_mock.assert_called_once_with(mock.ANY, states.POWER_OFF) + self.task.driver.rescue.clean_up.assert_called_once_with(self.task) + self.task.process_event.assert_called_once_with('fail') + log_calls = [mock.call('Rescue operation was unsuccessful, clean up ' + 'failed for node %(node)s: %(error)s', + {'node': self.node.uuid, + 'error': expected_exc}), + mock.call('Internal error. Node %(node)s in provision ' + 'state "%(state)s" could not transition to a ' + 'failed state.', + {'node': self.node.uuid, + 'state': self.node.provision_state})] + log_mock.assert_has_calls(log_calls) + self.node.save.assert_called_once_with() + class ValidatePortPhysnetTestCase(db_base.DbTestCase): diff --git a/ironic/tests/unit/db/utils.py b/ironic/tests/unit/db/utils.py index 511ad125e..5aea3fccd 100644 --- a/ironic/tests/unit/db/utils.py +++ b/ironic/tests/unit/db/utils.py @@ -53,6 +53,8 @@ def get_test_pxe_driver_info(): return { "deploy_kernel": "glance://deploy_kernel_uuid", "deploy_ramdisk": "glance://deploy_ramdisk_uuid", + "rescue_kernel": "glance://rescue_kernel_uuid", + "rescue_ramdisk": "glance://rescue_ramdisk_uuid" } @@ -66,6 +68,7 @@ def get_test_pxe_instance_info(): return { "image_source": "glance://image_uuid", "root_gb": 100, + "rescue_password": "password" } diff --git a/ironic/tests/unit/drivers/modules/test_agent.py b/ironic/tests/unit/drivers/modules/test_agent.py index b29e9e1d3..98e3397b3 100644 --- a/ironic/tests/unit/drivers/modules/test_agent.py +++ b/ironic/tests/unit/drivers/modules/test_agent.py @@ -16,14 +16,17 @@ import types import mock from oslo_config import cfg +from oslo_utils import reflection from ironic.common import dhcp_factory from ironic.common import exception from ironic.common import images +from ironic.common import neutron as neutron_common from ironic.common import raid from ironic.common import states from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils +from ironic.drivers import base as drivers_base from ironic.drivers.modules import agent from ironic.drivers.modules import agent_base_vendor from ironic.drivers.modules import agent_client @@ -388,23 +391,48 @@ class TestAgentDeploy(db_base.DbTestCase): self.node.refresh() self.assertEqual('bar', self.node.instance_info['foo']) + @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk') + @mock.patch.object(deploy_utils, 'build_agent_options') + @mock.patch.object(deploy_utils, 'build_instance_info_for_deploy') + def _test_prepare_rescue_states( + self, build_instance_info_mock, build_options_mock, + pxe_prepare_ramdisk_mock, prov_state): + with task_manager.acquire( + self.context, self.node['uuid'], shared=False) as task: + task.node.provision_state = prov_state + build_options_mock.return_value = {'a': 'b'} + self.driver.prepare(task) + self.assertFalse(build_instance_info_mock.called) + build_options_mock.assert_called_once_with(task.node) + pxe_prepare_ramdisk_mock.assert_called_once_with( + task, {'a': 'b'}) + + def test_prepare_rescue_states(self): + for state in (states.RESCUING, states.RESCUEWAIT, + states.RESCUE, states.RESCUEFAIL): + self._test_prepare_rescue_states(prov_state=state) + @mock.patch.object(noop_storage.NoopStorage, 'attach_volumes', autospec=True) @mock.patch.object(deploy_utils, 'populate_storage_driver_internal_info') @mock.patch.object(flat_network.FlatNetwork, 'add_provisioning_network', spec_set=True, autospec=True) - @mock.patch.object(pxe.PXEBoot, 'prepare_instance') - @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk') - @mock.patch.object(deploy_utils, 'build_agent_options') - @mock.patch.object(deploy_utils, 'build_instance_info_for_deploy') - def test_prepare_active( + @mock.patch.object(pxe.PXEBoot, 'prepare_instance', + spec_set=True, autospec=True) + @mock.patch.object(pxe.PXEBoot, 'prepare_ramdisk', + spec_set=True, autospec=True) + @mock.patch.object(deploy_utils, 'build_agent_options', + spec_set=True, autospec=True) + @mock.patch.object(deploy_utils, 'build_instance_info_for_deploy', + spec_set=True, autospec=True) + def _test_prepare_conductor_takeover( self, build_instance_info_mock, build_options_mock, pxe_prepare_ramdisk_mock, pxe_prepare_instance_mock, add_provisioning_net_mock, storage_driver_info_mock, - storage_attach_volumes_mock): + storage_attach_volumes_mock, prov_state): with task_manager.acquire( self.context, self.node['uuid'], shared=False) as task: - task.node.provision_state = states.ACTIVE + task.node.provision_state = prov_state self.driver.prepare(task) @@ -416,6 +444,11 @@ class TestAgentDeploy(db_base.DbTestCase): self.assertTrue(storage_driver_info_mock.called) self.assertFalse(storage_attach_volumes_mock.called) + def test_prepare_active_and_unrescue_states(self): + for prov_state in (states.ACTIVE, states.UNRESCUING): + self._test_prepare_conductor_takeover( + prov_state=prov_state) + @mock.patch.object(noop_storage.NoopStorage, 'should_write_image', autospec=True) @mock.patch.object(noop_storage.NoopStorage, 'attach_volumes', @@ -1193,3 +1226,266 @@ class AgentRAIDTestCase(db_base.DbTestCase): self.node.refresh() self.assertEqual({}, self.node.raid_config) + + +class AgentRescueTestCase(db_base.DbTestCase): + + def setUp(self): + super(AgentRescueTestCase, self).setUp() + for iface in drivers_base.ALL_INTERFACES: + impl = 'fake' + if iface == 'network': + impl = 'flat' + if iface == 'rescue': + impl = 'agent' + config_kwarg = {'enabled_%s_interfaces' % iface: [impl], + 'default_%s_interface' % iface: impl} + self.config(**config_kwarg) + self.config(enabled_hardware_types=['fake-hardware']) + instance_info = INSTANCE_INFO + instance_info.update({'rescue_password': 'password'}) + driver_info = DRIVER_INFO + driver_info.update({'rescue_ramdisk': 'my_ramdisk', + 'rescue_kernel': 'my_kernel'}) + n = { + 'driver': 'fake-hardware', + 'instance_info': instance_info, + 'driver_info': driver_info, + 'driver_internal_info': DRIVER_INTERNAL_INFO, + } + self.node = object_utils.create_test_node(self.context, **n) + + @mock.patch.object(flat_network.FlatNetwork, 'add_rescuing_network', + spec_set=True, autospec=True) + @mock.patch.object(flat_network.FlatNetwork, 'unconfigure_tenant_networks', + spec_set=True, autospec=True) + @mock.patch.object(fake.FakeBoot, 'prepare_ramdisk', autospec=True) + @mock.patch.object(fake.FakeBoot, 'clean_up_instance', autospec=True) + @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test_agent_rescue(self, mock_node_power_action, mock_build_agent_opts, + mock_clean_up_instance, mock_prepare_ramdisk, + mock_unconf_tenant_net, mock_add_rescue_net): + self.config(manage_agent_boot=True, group='agent') + mock_build_agent_opts.return_value = {'ipa-api-url': 'fake-api'} + with task_manager.acquire(self.context, self.node.uuid) as task: + result = task.driver.rescue.rescue(task) + mock_node_power_action.assert_has_calls( + [mock.call(task, states.POWER_OFF), + mock.call(task, states.POWER_ON)]) + mock_clean_up_instance.assert_called_once_with(mock.ANY, task) + mock_unconf_tenant_net.assert_called_once_with(mock.ANY, task) + mock_add_rescue_net.assert_called_once_with(mock.ANY, task) + mock_build_agent_opts.assert_called_once_with(task.node) + mock_prepare_ramdisk.assert_called_once_with( + mock.ANY, task, {'ipa-api-url': 'fake-api'}, mode='rescue') + self.assertEqual(states.RESCUEWAIT, result) + + @mock.patch.object(flat_network.FlatNetwork, 'add_rescuing_network', + spec_set=True, autospec=True) + @mock.patch.object(flat_network.FlatNetwork, 'unconfigure_tenant_networks', + spec_set=True, autospec=True) + @mock.patch.object(fake.FakeBoot, 'prepare_ramdisk', autospec=True) + @mock.patch.object(fake.FakeBoot, 'clean_up_instance', autospec=True) + @mock.patch.object(deploy_utils, 'build_agent_options', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test_agent_rescue_no_manage_agent_boot(self, mock_node_power_action, + mock_build_agent_opts, + mock_clean_up_instance, + mock_prepare_ramdisk, + mock_unconf_tenant_net, + mock_add_rescue_net): + self.config(manage_agent_boot=False, group='agent') + with task_manager.acquire(self.context, self.node.uuid) as task: + result = task.driver.rescue.rescue(task) + mock_node_power_action.assert_has_calls( + [mock.call(task, states.POWER_OFF), + mock.call(task, states.POWER_ON)]) + mock_clean_up_instance.assert_called_once_with(mock.ANY, task) + mock_unconf_tenant_net.assert_called_once_with(mock.ANY, task) + mock_add_rescue_net.assert_called_once_with(mock.ANY, task) + self.assertFalse(mock_build_agent_opts.called) + self.assertFalse(mock_prepare_ramdisk.called) + self.assertEqual(states.RESCUEWAIT, result) + + @mock.patch.object(flat_network.FlatNetwork, 'remove_rescuing_network', + spec_set=True, autospec=True) + @mock.patch.object(flat_network.FlatNetwork, 'configure_tenant_networks', + spec_set=True, autospec=True) + @mock.patch.object(fake.FakeBoot, 'prepare_instance', autospec=True) + @mock.patch.object(fake.FakeBoot, 'clean_up_ramdisk', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test_agent_unrescue(self, mock_node_power_action, mock_clean_ramdisk, + mock_prepare_instance, mock_conf_tenant_net, + mock_remove_rescue_net): + """Test unrescue in case where boot driver prepares instance reboot.""" + self.config(manage_agent_boot=True, group='agent') + with task_manager.acquire(self.context, self.node.uuid) as task: + result = task.driver.rescue.unrescue(task) + mock_node_power_action.assert_has_calls( + [mock.call(task, states.POWER_OFF), + mock.call(task, states.POWER_ON)]) + mock_clean_ramdisk.assert_called_once_with( + mock.ANY, task, mode='rescue') + mock_remove_rescue_net.assert_called_once_with(mock.ANY, task) + mock_conf_tenant_net.assert_called_once_with(mock.ANY, task) + mock_prepare_instance.assert_called_once_with(mock.ANY, task) + self.assertEqual(states.ACTIVE, result) + + @mock.patch.object(flat_network.FlatNetwork, 'remove_rescuing_network', + spec_set=True, autospec=True) + @mock.patch.object(flat_network.FlatNetwork, 'configure_tenant_networks', + spec_set=True, autospec=True) + @mock.patch.object(fake.FakeBoot, 'prepare_instance', autospec=True) + @mock.patch.object(fake.FakeBoot, 'clean_up_ramdisk', autospec=True) + @mock.patch.object(manager_utils, 'node_power_action', autospec=True) + def test_agent_unrescue_no_manage_agent_boot(self, mock_node_power_action, + mock_clean_ramdisk, + mock_prepare_instance, + mock_conf_tenant_net, + mock_remove_rescue_net): + """Test unrescue in case where boot driver prepares instance reboot.""" + self.config(manage_agent_boot=False, group='agent') + with task_manager.acquire(self.context, self.node.uuid) as task: + result = task.driver.rescue.unrescue(task) + mock_node_power_action.assert_has_calls( + [mock.call(task, states.POWER_OFF), + mock.call(task, states.POWER_ON)]) + self.assertFalse(mock_clean_ramdisk.called) + mock_remove_rescue_net.assert_called_once_with(mock.ANY, task) + mock_conf_tenant_net.assert_called_once_with(mock.ANY, task) + mock_prepare_instance.assert_called_once_with(mock.ANY, task) + self.assertEqual(states.ACTIVE, result) + + @mock.patch.object(neutron_common, 'validate_network', autospec=True) + @mock.patch.object(fake.FakeBoot, 'validate', autospec=True) + def test_agent_rescue_validate(self, mock_boot_validate, + mock_validate_network): + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.rescue.validate(task) + self.assertFalse(mock_validate_network.called) + mock_boot_validate.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(neutron_common, 'validate_network', autospec=True) + @mock.patch.object(fake.FakeBoot, 'validate', autospec=True) + def test_agent_rescue_validate_neutron_net(self, mock_boot_validate, + mock_validate_network): + self.config(enabled_network_interfaces=['neutron']) + self.node.network_interface = 'neutron' + self.node.save() + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.rescue.validate(task) + mock_validate_network.assert_called_once_with( + CONF.neutron.rescuing_network, 'rescuing network', + context=task.context) + mock_boot_validate.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(neutron_common, 'validate_network', autospec=True) + @mock.patch.object(fake.FakeBoot, 'validate', autospec=True) + def test_agent_rescue_validate_no_manage_agent(self, mock_boot_validate, + mock_validate_network): + # If ironic's not managing booting of ramdisks, we don't set up PXE for + # the ramdisk/kernel, so validation can pass without this info + self.config(manage_agent_boot=False, group='agent') + driver_info = self.node.driver_info + del driver_info['rescue_ramdisk'] + del driver_info['rescue_kernel'] + self.node.driver_info = driver_info + self.node.save() + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.rescue.validate(task) + self.assertFalse(mock_validate_network.called) + self.assertFalse(mock_boot_validate.called) + + @mock.patch.object(neutron_common, 'validate_network', autospec=True) + @mock.patch.object(fake.FakeBoot, 'validate', autospec=True) + def test_agent_rescue_validate_fails_no_rescue_ramdisk( + self, mock_boot_validate, mock_validate_network): + driver_info = self.node.driver_info + del driver_info['rescue_ramdisk'] + self.node.driver_info = driver_info + self.node.save() + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.MissingParameterValue, + task.driver.rescue.validate, task) + self.assertFalse(mock_validate_network.called) + mock_boot_validate.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(neutron_common, 'validate_network', autospec=True) + @mock.patch.object(fake.FakeBoot, 'validate', autospec=True) + def test_agent_rescue_validate_fails_no_rescue_kernel( + self, mock_boot_validate, mock_validate_network): + driver_info = self.node.driver_info + del driver_info['rescue_kernel'] + self.node.driver_info = driver_info + self.node.save() + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.MissingParameterValue, + task.driver.rescue.validate, task) + self.assertFalse(mock_validate_network.called) + mock_boot_validate.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(neutron_common, 'validate_network', autospec=True) + @mock.patch.object(fake.FakeBoot, 'validate', autospec=True) + def test_agent_rescue_validate_fails_no_rescue_password( + self, mock_boot_validate, mock_validate_network): + instance_info = self.node.instance_info + del instance_info['rescue_password'] + self.node.instance_info = instance_info + self.node.save() + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.MissingParameterValue, + task.driver.rescue.validate, task) + self.assertFalse(mock_validate_network.called) + mock_boot_validate.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(neutron_common, 'validate_network', autospec=True) + @mock.patch.object(fake.FakeBoot, 'validate', autospec=True) + def test_agent_rescue_validate_fails_empty_rescue_password( + self, mock_boot_validate, mock_validate_network): + instance_info = self.node.instance_info + instance_info['rescue_password'] = " " + self.node.instance_info = instance_info + self.node.save() + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.InvalidParameterValue, + task.driver.rescue.validate, task) + self.assertFalse(mock_validate_network.called) + mock_boot_validate.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(neutron_common, 'validate_network', autospec=True) + @mock.patch.object(reflection, 'get_signature', autospec=True) + @mock.patch.object(fake.FakeBoot, 'validate', autospec=True) + def test_agent_rescue_validate_incompat_exc(self, mock_boot_validate, + mock_get_signature, + mock_validate_network): + mock_get_signature.return_value.parameters = ['task'] + with task_manager.acquire(self.context, self.node.uuid) as task: + self.assertRaises(exception.IncompatibleInterface, + task.driver.rescue.validate, task) + self.assertFalse(mock_validate_network.called) + self.assertFalse(mock_boot_validate.called) + + @mock.patch.object(flat_network.FlatNetwork, 'remove_rescuing_network', + spec_set=True, autospec=True) + @mock.patch.object(fake.FakeBoot, 'clean_up_ramdisk', autospec=True) + def test_agent_rescue_clean_up(self, mock_clean_ramdisk, + mock_remove_rescue_net): + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.rescue.clean_up(task) + self.assertNotIn('rescue_password', task.node.instance_info) + mock_clean_ramdisk.assert_called_once_with( + mock.ANY, task, mode='rescue') + mock_remove_rescue_net.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(flat_network.FlatNetwork, 'remove_rescuing_network', + spec_set=True, autospec=True) + @mock.patch.object(fake.FakeBoot, 'clean_up_ramdisk', autospec=True) + def test_agent_rescue_clean_up_no_manage_boot(self, mock_clean_ramdisk, + mock_remove_rescue_net): + self.config(manage_agent_boot=False, group='agent') + with task_manager.acquire(self.context, self.node.uuid) as task: + task.driver.rescue.clean_up(task) + self.assertNotIn('rescue_password', task.node.instance_info) + self.assertFalse(mock_clean_ramdisk.called) + mock_remove_rescue_net.assert_called_once_with(mock.ANY, task) diff --git a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py index b7787f9eb..e56d987a1 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py +++ b/ironic/tests/unit/drivers/modules/test_agent_base_vendor.py @@ -24,14 +24,16 @@ from ironic.common import exception from ironic.common import states from ironic.conductor import task_manager from ironic.conductor import utils as manager_utils +from ironic.drivers import base as drivers_base +from ironic.drivers.modules import agent from ironic.drivers.modules import agent_base_vendor from ironic.drivers.modules import agent_client from ironic.drivers.modules import deploy_utils from ironic.drivers.modules import fake +from ironic.drivers.modules.network import flat as flat_network from ironic.drivers.modules import pxe from ironic.drivers import utils as driver_utils from ironic import objects -from ironic.tests.unit.conductor import mgr_utils from ironic.tests.unit.db import base as db_base from ironic.tests.unit.db import utils as db_utils from ironic.tests.unit.objects import utils as object_utils @@ -47,10 +49,23 @@ class AgentDeployMixinBaseTest(db_base.DbTestCase): def setUp(self): super(AgentDeployMixinBaseTest, self).setUp() - mgr_utils.mock_the_extension_manager(driver="fake_agent") + for iface in drivers_base.ALL_INTERFACES: + impl = 'fake' + if iface == 'deploy': + impl = 'direct' + if iface == 'boot': + impl = 'pxe' + if iface == 'rescue': + impl = 'agent' + if iface == 'network': + continue + config_kwarg = {'enabled_%s_interfaces' % iface: [impl], + 'default_%s_interface' % iface: impl} + self.config(**config_kwarg) + self.config(enabled_hardware_types=['fake-hardware']) self.deploy = agent_base_vendor.AgentDeployMixin() n = { - 'driver': 'fake_agent', + 'driver': 'fake-hardware', 'instance_info': INSTANCE_INFO, 'driver_info': DRIVER_INFO, 'driver_internal_info': DRIVER_INTERNAL_INFO, @@ -132,9 +147,9 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): failed_mock.assert_called_once_with( task, mock.ANY, collect_logs=True) log_mock.assert_called_once_with( - 'Asynchronous exception for node ' - '1be26c0b-03f2-4d2e-ae87-c02d7f33c123: Failed checking if deploy ' - 'is done. Exception: LlamaException') + 'Asynchronous exception: Failed checking if deploy is done. ' + 'Exception: LlamaException for node %(node)s', + {'node': '1be26c0b-03f2-4d2e-ae87-c02d7f33c123'}) @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_has_started', autospec=True) @@ -164,9 +179,9 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): # deploy_utils.set_failed_state anymore self.assertFalse(failed_mock.called) log_mock.assert_called_once_with( - 'Asynchronous exception for node ' - '1be26c0b-03f2-4d2e-ae87-c02d7f33c123: Failed checking if deploy ' - 'is done. Exception: LlamaException') + 'Asynchronous exception: Failed checking if deploy is done. ' + 'Exception: LlamaException for node %(node)s', + {'node': '1be26c0b-03f2-4d2e-ae87-c02d7f33c123'}) @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) @mock.patch.object(agent_base_vendor.HeartbeatMixin, @@ -265,6 +280,34 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): mock_continue.assert_called_once_with(mock.ANY, task) mock_handler.assert_called_once_with(task, mock.ANY) + @mock.patch.object(agent_base_vendor.HeartbeatMixin, '_finalize_rescue', + autospec=True) + def test_heartbeat_rescue(self, mock_finalize_rescue): + self.node.provision_state = states.RESCUEWAIT + self.node.save() + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '1.0.0') + + mock_finalize_rescue.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(manager_utils, 'rescuing_error_handler') + @mock.patch.object(agent_base_vendor.HeartbeatMixin, '_finalize_rescue', + autospec=True) + def test_heartbeat_rescue_fails(self, mock_finalize, + mock_rescue_err_handler): + self.node.provision_state = states.RESCUEWAIT + self.node.save() + mock_finalize.side_effect = Exception('some failure') + with task_manager.acquire( + self.context, self.node.uuid, shared=False) as task: + self.deploy.heartbeat(task, 'http://127.0.0.1:8080', '1.0.0') + + mock_finalize.assert_called_once_with(mock.ANY, task) + mock_rescue_err_handler.assert_called_once_with( + task, 'Asynchronous exception: Node failed to perform ' + 'rescue operation. Exception: some failure for node') + @mock.patch.object(objects.node.Node, 'touch_provisioning', autospec=True) @mock.patch.object(agent_base_vendor.HeartbeatMixin, 'deploy_has_started', autospec=True) @@ -285,8 +328,100 @@ class HeartbeatMixinTest(AgentDeployMixinBaseTest): mock_touch.assert_called_once_with(mock.ANY) -class AgentDeployMixinTest(AgentDeployMixinBaseTest): +class AgentRescueTests(db_base.DbTestCase): + def setUp(self): + super(AgentRescueTests, self).setUp() + for iface in drivers_base.ALL_INTERFACES: + impl = 'fake' + if iface == 'deploy': + impl = 'direct' + if iface == 'boot': + impl = 'pxe' + if iface == 'rescue': + impl = 'agent' + if iface == 'network': + impl = 'flat' + config_kwarg = {'enabled_%s_interfaces' % iface: [impl], + 'default_%s_interface' % iface: impl} + self.config(**config_kwarg) + self.config(enabled_hardware_types=['fake-hardware']) + instance_info = INSTANCE_INFO + driver_info = DRIVER_INFO + self.deploy = agent_base_vendor.AgentDeployMixin() + n = { + 'driver': 'fake-hardware', + 'instance_info': instance_info, + 'driver_info': driver_info, + 'driver_internal_info': DRIVER_INTERNAL_INFO, + } + self.node = object_utils.create_test_node(self.context, **n) + + @mock.patch.object(flat_network.FlatNetwork, 'configure_tenant_networks', + spec_set=True, autospec=True) + @mock.patch.object(agent.AgentRescue, 'clean_up', + spec_set=True, autospec=True) + @mock.patch.object(agent_client.AgentClient, 'finalize_rescue', + spec=types.FunctionType) + def test__finalize_rescue(self, mock_finalize_rescue, + mock_clean_up, mock_conf_tenant_net): + node = self.node + node.provision_state = states.RESCUEWAIT + node.save() + mock_finalize_rescue.return_value = {'command_status': 'SUCCEEDED'} + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + task.process_event = mock.Mock() + self.deploy._finalize_rescue(task) + mock_finalize_rescue.assert_called_once_with(task.node) + task.process_event.assert_has_calls([mock.call('resume'), + mock.call('done')]) + mock_clean_up.assert_called_once_with(mock.ANY, task) + mock_conf_tenant_net.assert_called_once_with(mock.ANY, task) + + @mock.patch.object(agent_client.AgentClient, 'finalize_rescue', + spec=types.FunctionType) + def test__finalize_rescue_bad_command_result(self, mock_finalize_rescue): + node = self.node + node.provision_state = states.RESCUEWAIT + node.save() + mock_finalize_rescue.return_value = {'command_status': 'FAILED', + 'command_error': 'bad'} + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.assertRaises(exception.InstanceRescueFailure, + self.deploy._finalize_rescue, task) + mock_finalize_rescue.assert_called_once_with(task.node) + + @mock.patch.object(agent_client.AgentClient, 'finalize_rescue', + spec=types.FunctionType) + def test__finalize_rescue_exc(self, mock_finalize_rescue): + node = self.node + node.provision_state = states.RESCUEWAIT + node.save() + mock_finalize_rescue.side_effect = exception.IronicException("No pass") + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.assertRaises(exception.InstanceRescueFailure, + self.deploy._finalize_rescue, task) + mock_finalize_rescue.assert_called_once_with(task.node) + + @mock.patch.object(agent_client.AgentClient, 'finalize_rescue', + spec=types.FunctionType) + def test__finalize_rescue_missing_command_result(self, + mock_finalize_rescue): + node = self.node + node.provision_state = states.RESCUEWAIT + node.save() + mock_finalize_rescue.return_value = {} + with task_manager.acquire(self.context, self.node['uuid'], + shared=False) as task: + self.assertRaises(exception.InstanceRescueFailure, + self.deploy._finalize_rescue, task) + mock_finalize_rescue.assert_called_once_with(task.node) + + +class AgentDeployMixinTest(AgentDeployMixinBaseTest): @mock.patch.object(driver_utils, 'collect_ramdisk_logs', autospec=True) @mock.patch.object(time, 'sleep', lambda seconds: None) @mock.patch.object(manager_utils, 'node_power_action', autospec=True) diff --git a/ironic/tests/unit/drivers/modules/test_agent_client.py b/ironic/tests/unit/drivers/modules/test_agent_client.py index 3c8ca31fd..4683de129 100644 --- a/ironic/tests/unit/drivers/modules/test_agent_client.py +++ b/ironic/tests/unit/drivers/modules/test_agent_client.py @@ -284,3 +284,22 @@ class TestAgentClient(base.TestCase): self.client.sync(self.node) self.client._command.assert_called_once_with( node=self.node, method='standby.sync', params={}, wait=True) + + def test_finalize_rescue(self): + self.client._command = mock.MagicMock(spec_set=[]) + self.node.instance_info['rescue_password'] = 'password' + expected_params = { + 'rescue_password': 'password', + } + self.client.finalize_rescue(self.node) + self.client._command.assert_called_once_with( + node=self.node, method='rescue.finalize_rescue', + params=expected_params) + + def test_finalize_rescue_exc(self): + # node does not have 'rescue_password' set in its 'instance_info' + self.client._command = mock.MagicMock(spec_set=[]) + self.assertRaises(exception.IronicException, + self.client.finalize_rescue, + self.node) + self.assertFalse(self.client._command.called) diff --git a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py index 15e3ecc89..ba3479352 100644 --- a/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py +++ b/ironic/tests/unit/drivers/modules/test_iscsi_deploy.py @@ -996,7 +996,7 @@ class CleanUpFullFlowTestCase(db_base.DbTestCase): @mock.patch('ironic.common.dhcp_factory.DHCPFactory._set_dhcp_provider') @mock.patch('ironic.common.dhcp_factory.DHCPFactory.clean_dhcp') @mock.patch.object(pxe, '_get_instance_image_info', autospec=True) - @mock.patch.object(pxe, '_get_deploy_image_info', autospec=True) + @mock.patch.object(pxe, '_get_image_info', autospec=True) def test_clean_up_with_master(self, mock_get_deploy_image_info, mock_get_instance_image_info, clean_dhcp_mock, set_dhcp_provider_mock): @@ -1010,7 +1010,8 @@ class CleanUpFullFlowTestCase(db_base.DbTestCase): task.driver.deploy.clean_up(task) mock_get_instance_image_info.assert_called_with(task.node, task.context) - mock_get_deploy_image_info.assert_called_with(task.node) + mock_get_deploy_image_info.assert_called_with(task.node, + mode='deploy') set_dhcp_provider_mock.assert_called_once_with() clean_dhcp_mock.assert_called_once_with(task) for path in ([self.kernel_path, self.image_path, self.config_path] diff --git a/ironic/tests/unit/drivers/modules/test_pxe.py b/ironic/tests/unit/drivers/modules/test_pxe.py index 44ccf293e..77fe1e0f8 100644 --- a/ironic/tests/unit/drivers/modules/test_pxe.py +++ b/ironic/tests/unit/drivers/modules/test_pxe.py @@ -63,21 +63,43 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): mgr_utils.mock_the_extension_manager(driver="fake_pxe") self.node = obj_utils.create_test_node(self.context, **n) + def _test__parse_driver_info_missing_kernel(self, mode='deploy'): + del self.node.driver_info['%s_kernel' % mode] + if mode == 'rescue': + self.node.provision_state = states.RESCUING + self.assertRaises(exception.MissingParameterValue, + pxe._parse_driver_info, self.node, mode=mode) + def test__parse_driver_info_missing_deploy_kernel(self): - del self.node.driver_info['deploy_kernel'] + self._test__parse_driver_info_missing_kernel() + + def test__parse_driver_info_missing_rescue_kernel(self): + self._test__parse_driver_info_missing_kernel(mode='rescue') + + def _test__parse_driver_info_missing_ramdisk(self, mode='deploy'): + del self.node.driver_info['%s_ramdisk' % mode] + if mode == 'rescue': + self.node.provision_state = states.RESCUING self.assertRaises(exception.MissingParameterValue, - pxe._parse_driver_info, self.node) + pxe._parse_driver_info, self.node, mode=mode) def test__parse_driver_info_missing_deploy_ramdisk(self): - del self.node.driver_info['deploy_ramdisk'] - self.assertRaises(exception.MissingParameterValue, - pxe._parse_driver_info, self.node) + self._test__parse_driver_info_missing_ramdisk() - def test__parse_driver_info(self): - expected_info = {'deploy_ramdisk': 'glance://deploy_ramdisk_uuid', - 'deploy_kernel': 'glance://deploy_kernel_uuid'} - image_info = pxe._parse_driver_info(self.node) - self.assertEqual(expected_info, image_info) + def test__parse_driver_info_missing_rescue_ramdisk(self): + self._test__parse_driver_info_missing_ramdisk(mode='rescue') + + def _test__parse_driver_info(self, mode='deploy'): + exp_info = {'%s_ramdisk' % mode: 'glance://%s_ramdisk_uuid' % mode, + '%s_kernel' % mode: 'glance://%s_kernel_uuid' % mode} + image_info = pxe._parse_driver_info(self.node, mode=mode) + self.assertEqual(exp_info, image_info) + + def test__parse_driver_info_deploy(self): + self._test__parse_driver_info() + + def test__parse_driver_info_rescue(self): + self._test__parse_driver_info(mode='rescue') def test__get_deploy_image_info(self): expected_info = {'deploy_ramdisk': @@ -90,18 +112,18 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): os.path.join(CONF.pxe.tftp_root, self.node.uuid, 'deploy_kernel'))} - image_info = pxe._get_deploy_image_info(self.node) + image_info = pxe._get_image_info(self.node) self.assertEqual(expected_info, image_info) def test__get_deploy_image_info_missing_deploy_kernel(self): del self.node.driver_info['deploy_kernel'] self.assertRaises(exception.MissingParameterValue, - pxe._get_deploy_image_info, self.node) + pxe._get_image_info, self.node) def test__get_deploy_image_info_deploy_ramdisk(self): del self.node.driver_info['deploy_ramdisk'] self.assertRaises(exception.MissingParameterValue, - pxe._get_deploy_image_info, self.node) + pxe._get_image_info, self.node) @mock.patch.object(base_image_service.BaseImageService, '_show', autospec=True) @@ -168,7 +190,7 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): @mock.patch('ironic.common.utils.render_template', autospec=True) def _test_build_pxe_config_options_pxe(self, render_mock, whle_dsk_img=False, - debug=False): + debug=False, mode='deploy'): self.config(debug=debug) self.config(pxe_append_params='test_param', group='pxe') # NOTE: right '/' should be removed from url string @@ -181,21 +203,24 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): tftp_server = CONF.pxe.tftp_server - deploy_kernel = os.path.join(self.node.uuid, 'deploy_kernel') - deploy_ramdisk = os.path.join(self.node.uuid, 'deploy_ramdisk') + kernel_label = '%s_kernel' % mode + ramdisk_label = '%s_ramdisk' % mode + + pxe_kernel = os.path.join(self.node.uuid, kernel_label) + pxe_ramdisk = os.path.join(self.node.uuid, ramdisk_label) kernel = os.path.join(self.node.uuid, 'kernel') ramdisk = os.path.join(self.node.uuid, 'ramdisk') root_dir = CONF.pxe.tftp_root image_info = { - 'deploy_kernel': ('deploy_kernel', - os.path.join(root_dir, - self.node.uuid, - 'deploy_kernel')), - 'deploy_ramdisk': ('deploy_ramdisk', - os.path.join(root_dir, - self.node.uuid, - 'deploy_ramdisk')) + kernel_label: (kernel_label, + os.path.join(root_dir, + self.node.uuid, + kernel_label)), + ramdisk_label: (ramdisk_label, + os.path.join(root_dir, + self.node.uuid, + ramdisk_label)) } if (whle_dsk_img or @@ -219,15 +244,19 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): expected_pxe_params += ' ipa-debug=1' expected_options = { - 'ari_path': ramdisk, - 'deployment_ari_path': deploy_ramdisk, + 'deployment_ari_path': pxe_ramdisk, 'pxe_append_params': expected_pxe_params, - 'aki_path': kernel, - 'deployment_aki_path': deploy_kernel, + 'deployment_aki_path': pxe_kernel, 'tftp_server': tftp_server, 'ipxe_timeout': 0, } + if mode == 'deploy': + expected_options.update({'ari_path': ramdisk, 'aki_path': kernel}) + elif mode == 'rescue': + self.node.provision_state = states.RESCUING + self.node.save() + with task_manager.acquire(self.context, self.node.uuid, shared=True) as task: options = pxe._build_pxe_config_options(task, image_info) @@ -239,6 +268,14 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): def test__build_pxe_config_options_pxe_ipa_debug(self): self._test_build_pxe_config_options_pxe(debug=True) + def test__build_pxe_config_options_pxe_rescue(self): + del self.node.driver_internal_info['is_whole_disk_image'] + self._test_build_pxe_config_options_pxe(mode='rescue') + + def test__build_pxe_config_options_ipa_debug_rescue(self): + del self.node.driver_internal_info['is_whole_disk_image'] + self._test_build_pxe_config_options_pxe(debug=True, mode='rescue') + def test__build_pxe_config_options_pxe_local_boot(self): del self.node.driver_internal_info['is_whole_disk_image'] i_info = self.node.instance_info @@ -289,7 +326,8 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): ipxe_timeout=0, ipxe_use_swift=False, debug=False, - boot_from_volume=False): + boot_from_volume=False, + mode='deploy'): self.config(debug=debug) self.config(pxe_append_params='test_param', group='pxe') # NOTE: right '/' should be removed from url string @@ -307,37 +345,41 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): http_url = 'http://192.1.2.3:1234' self.config(ipxe_enabled=True, group='pxe') self.config(http_url=http_url, group='deploy') + + kernel_label = '%s_kernel' % mode + ramdisk_label = '%s_ramdisk' % mode + if ipxe_use_swift: self.config(ipxe_use_swift=True, group='pxe') glance = mock.Mock() glance_mock.return_value = glance glance.swift_temp_url.side_effect = [ - deploy_kernel, deploy_ramdisk] = [ + pxe_kernel, pxe_ramdisk] = [ 'swift_kernel', 'swift_ramdisk'] image_info = { - 'deploy_kernel': (uuidutils.generate_uuid(), - os.path.join(root_dir, - self.node.uuid, - 'deploy_kernel')), - 'deploy_ramdisk': (uuidutils.generate_uuid(), - os.path.join(root_dir, - self.node.uuid, - 'deploy_ramdisk')) + kernel_label: (uuidutils.generate_uuid(), + os.path.join(root_dir, + self.node.uuid, + kernel_label)), + ramdisk_label: (uuidutils.generate_uuid(), + os.path.join(root_dir, + self.node.uuid, + ramdisk_label)) } else: - deploy_kernel = os.path.join(http_url, self.node.uuid, - 'deploy_kernel') - deploy_ramdisk = os.path.join(http_url, self.node.uuid, - 'deploy_ramdisk') + pxe_kernel = os.path.join(http_url, self.node.uuid, + kernel_label) + pxe_ramdisk = os.path.join(http_url, self.node.uuid, + ramdisk_label) image_info = { - 'deploy_kernel': ('deploy_kernel', - os.path.join(root_dir, - self.node.uuid, - 'deploy_kernel')), - 'deploy_ramdisk': ('deploy_ramdisk', - os.path.join(root_dir, - self.node.uuid, - 'deploy_ramdisk')) + kernel_label: (kernel_label, + os.path.join(root_dir, + self.node.uuid, + kernel_label)), + ramdisk_label: (ramdisk_label, + os.path.join(root_dir, + self.node.uuid, + ramdisk_label)) } kernel = os.path.join(http_url, self.node.uuid, 'kernel') @@ -365,14 +407,17 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): expected_pxe_params += ' ipa-debug=1' expected_options = { - 'ari_path': ramdisk, - 'deployment_ari_path': deploy_ramdisk, + 'deployment_ari_path': pxe_ramdisk, 'pxe_append_params': expected_pxe_params, - 'aki_path': kernel, - 'deployment_aki_path': deploy_kernel, + 'deployment_aki_path': pxe_kernel, 'tftp_server': tftp_server, 'ipxe_timeout': ipxe_timeout_in_ms, } + if mode == 'deploy': + expected_options.update({'ari_path': ramdisk, 'aki_path': kernel}) + elif mode == 'rescue': + self.node.provision_state = states.RESCUING + self.node.save() if boot_from_volume: expected_options.update({ @@ -549,6 +594,17 @@ class PXEPrivateMethodsTestCase(db_base.DbTestCase): options = pxe._get_volume_pxe_options(task) self.assertEqual([], options['iscsi_volumes']) + def test__build_pxe_config_options_ipxe_rescue(self): + self._test_build_pxe_config_options_ipxe(mode='rescue') + + def test__build_pxe_config_options_ipxe_rescue_swift(self): + self._test_build_pxe_config_options_ipxe(mode='rescue', + ipxe_use_swift=True) + + def test__build_pxe_config_options_ipxe_rescue_timeout(self): + self._test_build_pxe_config_options_ipxe(mode='rescue', + ipxe_timeout=120) + @mock.patch.object(deploy_utils, 'fetch_images', autospec=True) def test__cache_tftp_images_master_path(self, mock_fetch_image): temp_dir = tempfile.mkdtemp() @@ -823,7 +879,7 @@ class PXEBootTestCase(db_base.DbTestCase): @mock.patch.object(manager_utils, 'node_set_boot_device', autospec=True) @mock.patch.object(dhcp_factory, 'DHCPFactory') @mock.patch.object(pxe, '_get_instance_image_info', autospec=True) - @mock.patch.object(pxe, '_get_deploy_image_info', autospec=True) + @mock.patch.object(pxe, '_get_image_info', autospec=True) @mock.patch.object(pxe, '_cache_ramdisk_kernel', autospec=True) @mock.patch.object(pxe, '_build_pxe_config_options', autospec=True) @mock.patch.object(pxe_utils, 'create_pxe_config', autospec=True) @@ -836,9 +892,13 @@ class PXEBootTestCase(db_base.DbTestCase): uefi=False, cleaning=False, ipxe_use_swift=False, - whole_disk_image=False): + whole_disk_image=False, + mode='deploy'): mock_build_pxe.return_value = {} - mock_deploy_img_info.return_value = {'deploy_kernel': 'a'} + kernel_label = '%s_kernel' % mode + ramdisk_label = '%s_ramdisk' % mode + mock_deploy_img_info.return_value = {kernel_label: 'a', + ramdisk_label: 'r'} if whole_disk_image: mock_instance_img_info.return_value = {} else: @@ -850,11 +910,16 @@ class PXEBootTestCase(db_base.DbTestCase): driver_internal_info = self.node.driver_internal_info driver_internal_info['is_whole_disk_image'] = whole_disk_image self.node.driver_internal_info = driver_internal_info + if mode == 'rescue': + mock_deploy_img_info.return_value = { + 'rescue_kernel': 'a', + 'rescue_ramdisk': 'r'} + self.node.provision_state = states.RESCUING self.node.save() with task_manager.acquire(self.context, self.node.uuid) as task: dhcp_opts = pxe_utils.dhcp_options_for_instance(task) - task.driver.boot.prepare_ramdisk(task, {'foo': 'bar'}) - mock_deploy_img_info.assert_called_once_with(task.node) + task.driver.boot.prepare_ramdisk(task, {'foo': 'bar'}, mode=mode) + mock_deploy_img_info.assert_called_once_with(task.node, mode=mode) provider_mock.update_dhcp.assert_called_once_with(task, dhcp_opts) set_boot_device_mock.assert_called_once_with(task, boot_devices.PXE, @@ -868,16 +933,21 @@ class PXEBootTestCase(db_base.DbTestCase): {'kernel': 'b'}) mock_instance_img_info.assert_called_once_with(task.node, self.context) - elif cleaning is False: + elif not cleaning and mode == 'deploy': mock_cache_r_k.assert_called_once_with( self.context, task.node, - {'deploy_kernel': 'a', 'kernel': 'b'}) + {'deploy_kernel': 'a', 'deploy_ramdisk': 'r', + 'kernel': 'b'}) mock_instance_img_info.assert_called_once_with(task.node, self.context) - else: - mock_cache_r_k.assert_called_once_with( - self.context, task.node, - {'deploy_kernel': 'a'}) + elif mode == 'deploy': + mock_cache_r_k.assert_called_once_with( + self.context, task.node, + {'deploy_kernel': 'a', 'deploy_ramdisk': 'r'}) + elif mode == 'rescue': + mock_cache_r_k.assert_called_once_with( + self.context, task.node, + {'rescue_kernel': 'a', 'rescue_ramdisk': 'r'}) if uefi: mock_pxe_config.assert_called_once_with( task, {'foo': 'bar'}, CONF.pxe.uefi_pxe_config_template) @@ -890,6 +960,11 @@ class PXEBootTestCase(db_base.DbTestCase): self.node.save() self._test_prepare_ramdisk() + def test_prepare_ramdisk_rescue(self): + self.node.provision_state = states.RESCUING + self.node.save() + self._test_prepare_ramdisk(mode='rescue') + def test_prepare_ramdisk_uefi(self): self.node.provision_state = states.DEPLOYING self.node.save() @@ -992,16 +1067,24 @@ class PXEBootTestCase(db_base.DbTestCase): self._test_prepare_ramdisk(cleaning=True) @mock.patch.object(pxe, '_clean_up_pxe_env', autospec=True) - @mock.patch.object(pxe, '_get_deploy_image_info', autospec=True) - def test_clean_up_ramdisk(self, get_deploy_image_info_mock, - clean_up_pxe_env_mock): + @mock.patch.object(pxe, '_get_image_info', autospec=True) + def _test_clean_up_ramdisk(self, get_image_info_mock, + clean_up_pxe_env_mock, mode='deploy'): with task_manager.acquire(self.context, self.node.uuid) as task: - image_info = {'deploy_kernel': ['', '/path/to/deploy_kernel'], - 'deploy_ramdisk': ['', '/path/to/deploy_ramdisk']} - get_deploy_image_info_mock.return_value = image_info - task.driver.boot.clean_up_ramdisk(task) + kernel_label = '%s_kernel' % mode + ramdisk_label = '%s_ramdisk' % mode + image_info = {kernel_label: ['', '/path/to/' + kernel_label], + ramdisk_label: ['', '/path/to/' + ramdisk_label]} + get_image_info_mock.return_value = image_info + task.driver.boot.clean_up_ramdisk(task, mode=mode) clean_up_pxe_env_mock.assert_called_once_with(task, image_info) - get_deploy_image_info_mock.assert_called_once_with(task.node) + get_image_info_mock.assert_called_once_with(task.node, mode=mode) + + def test_clean_up_ramdisk(self): + self._test_clean_up_ramdisk() + + def test_clean_up_ramdisk_rescue(self): + self._test_clean_up_ramdisk(mode='rescue') @mock.patch.object(manager_utils, 'node_set_boot_device', autospec=True) @mock.patch.object(deploy_utils, 'switch_pxe_config', autospec=True) diff --git a/ironic/tests/unit/drivers/test_ipmi.py b/ironic/tests/unit/drivers/test_ipmi.py index b261b50c4..84bf0dce5 100644 --- a/ironic/tests/unit/drivers/test_ipmi.py +++ b/ironic/tests/unit/drivers/test_ipmi.py @@ -61,6 +61,9 @@ class IPMIHardwareTestCase(db_base.DbTestCase): self.assertIsInstance( task.driver.storage, kwargs.get('storage', noop_storage.NoopStorage)) + self.assertIsInstance( + task.driver.rescue, + kwargs.get('rescue', noop.NoRescue)) def test_default_interfaces(self): node = obj_utils.create_test_node(self.context, driver='ipmi') @@ -92,6 +95,14 @@ class IPMIHardwareTestCase(db_base.DbTestCase): with task_manager.acquire(self.context, node.id) as task: self._validate_interfaces(task, storage=cinder.CinderStorage) + def test_override_with_agent_rescue(self): + self.config(enabled_rescue_interfaces=['agent']) + node = obj_utils.create_test_node( + self.context, driver='ipmi', + rescue_interface='agent') + with task_manager.acquire(self.context, node.id) as task: + self._validate_interfaces(task, rescue=agent.AgentRescue) + class IPMIClassicDriversTestCase(testtools.TestCase): @@ -154,6 +154,7 @@ ironic.hardware.interfaces.raid = no-raid = ironic.drivers.modules.noop:NoRAID ironic.hardware.interfaces.rescue = + agent = ironic.drivers.modules.agent:AgentRescue fake = ironic.drivers.modules.fake:FakeRescue no-rescue = ironic.drivers.modules.noop:NoRescue |