diff options
author | Jenkins <jenkins@review.openstack.org> | 2015-09-23 19:06:47 +0000 |
---|---|---|
committer | Gerrit Code Review <review@openstack.org> | 2015-09-23 19:06:47 +0000 |
commit | 9dafaf4929af838a14df41c59b9c413c64479b55 (patch) | |
tree | 179a31dd49c26361509cd5f7a80dc4c879d50c87 | |
parent | f4a1e2e0a3a6262cbc8d95045f49a19cf45f0e4a (diff) | |
parent | f7f15f50dd54887dc57c9c0175bcc26953f28fa8 (diff) | |
download | trove-9dafaf4929af838a14df41c59b9c413c64479b55.tar.gz |
Merge "Fix Postgres services management"
-rw-r--r-- | trove/guestagent/datastore/experimental/postgresql/service/process.py | 82 | ||||
-rw-r--r-- | trove/guestagent/datastore/service.py | 136 | ||||
-rw-r--r-- | trove/tests/unittests/guestagent/test_dbaas.py | 173 |
3 files changed, 322 insertions, 69 deletions
diff --git a/trove/guestagent/datastore/experimental/postgresql/service/process.py b/trove/guestagent/datastore/experimental/postgresql/service/process.py index 4495895d..a4fda138 100644 --- a/trove/guestagent/datastore/experimental/postgresql/service/process.py +++ b/trove/guestagent/datastore/experimental/postgresql/service/process.py @@ -16,83 +16,27 @@ from oslo_log import log as logging from trove.common import cfg -from trove.common import utils -from trove.guestagent.common import operating_system from trove.guestagent.datastore.experimental.postgresql.service.status import ( PgSqlAppStatus) LOG = logging.getLogger(__name__) CONF = cfg.CONF - -PGSQL_SERVICE_CANDIDATES = ("postgresql",) +PGSQL_SERVICE_CANDIDATES = ["postgresql"] class PgSqlProcess(object): """Mixin that manages the PgSql process.""" - def start_db(self, context): - self._enable_pgsql_on_boot() - """Start the PgSql service.""" - cmd = operating_system.service_discovery(PGSQL_SERVICE_CANDIDATES) - LOG.info( - _("{guest_id}: Starting database engine with command ({command}).") - .format( - guest_id=CONF.guest_id, - command=cmd['cmd_start'], - ) - ) - utils.execute_with_timeout( - *cmd['cmd_start'].split(), - timeout=30 - ) - - def _enable_pgsql_on_boot(self): - try: - pgsql_service = operating_system.service_discovery( - PGSQL_SERVICE_CANDIDATES) - utils.execute_with_timeout(pgsql_service['cmd_enable'], - shell=True) - except KeyError: - LOG.exception(_("Error enabling PostgreSQL start on boot.")) - raise RuntimeError("Service is not discovered.") - - def _disable_pgsql_on_boot(self): - try: - pgsql_service = operating_system.service_discovery( - PGSQL_SERVICE_CANDIDATES) - utils.execute_with_timeout(pgsql_service['cmd_disable'], - shell=True) - except KeyError: - LOG.exception(_("Error disabling PostgreSQL start on boot.")) - raise RuntimeError("Service is not discovered.") - - def stop_db(self, context, do_not_start_on_reboot=False): - """Stop the PgSql service.""" - if do_not_start_on_reboot: - self._disable_pgsql_on_boot() - cmd = operating_system.service_discovery(PGSQL_SERVICE_CANDIDATES) - LOG.info( - _("{guest_id}: Stopping database engine with command ({command}).") - .format( - guest_id=CONF.guest_id, - command=cmd['cmd_stop'], - ) - ) - utils.execute_with_timeout( - *cmd['cmd_stop'].split(), - timeout=30 - ) - def restart(self, context): - """Restart the PgSql service.""" - LOG.info( - _("{guest_id}: Restarting database engine.").format( - guest_id=CONF.guest_id, - ) - ) - try: - PgSqlAppStatus.get().begin_restart() - self.stop_db(context) - self.start_db(context) - finally: - PgSqlAppStatus.get().end_install_or_restart() + PgSqlAppStatus.get().restart_db_service( + PGSQL_SERVICE_CANDIDATES, CONF.state_change_wait_time) + + def start_db(self, context, enable_on_boot=True, update_db=False): + PgSqlAppStatus.get().start_db_service( + PGSQL_SERVICE_CANDIDATES, CONF.state_change_wait_time, + enable_on_boot=enable_on_boot, update_db=update_db) + + def stop_db(self, context, do_not_start_on_reboot=False, update_db=False): + PgSqlAppStatus.get().stop_db_service( + PGSQL_SERVICE_CANDIDATES, CONF.state_change_wait_time, + disable_on_boot=do_not_start_on_reboot, update_db=update_db) diff --git a/trove/guestagent/datastore/service.py b/trove/guestagent/datastore/service.py index 630a502c..a1a5d75b 100644 --- a/trove/guestagent/datastore/service.py +++ b/trove/guestagent/datastore/service.py @@ -23,6 +23,7 @@ from trove.common import context as trove_context from trove.common.i18n import _ from trove.common import instance from trove.conductor import api as conductor_api +from trove.guestagent.common import operating_system from trove.guestagent.common import timeutils LOG = logging.getLogger(__name__) @@ -141,6 +142,135 @@ class BaseDbStatus(object): "for now we'll skip determining the status of DB on " "this instance.")) + def restart_db_service(self, service_candidates, timeout): + """Restart the database. + Do not change the service auto-start setting. + Disable the Trove instance heartbeat updates during the restart. + + 1. Stop the database service. + 2. Wait for the database to shutdown. + 3. Start the database service. + 4. Wait for the database to start running. + + :param service_candidates: List of possible system service names. + :type service_candidates: list + + :param timeout: Wait timeout in seconds. + :type timeout: integer + + :raises: :class:`RuntimeError` on failure. + """ + try: + self.begin_restart() + self.stop_db_service(service_candidates, timeout, + disable_on_boot=False, update_db=False) + self.start_db_service(service_candidates, timeout, + enable_on_boot=False, update_db=False) + except Exception as e: + LOG.exception(e) + raise RuntimeError(_("Database restart failed.")) + finally: + self.end_install_or_restart() + + def start_db_service(self, service_candidates, timeout, + enable_on_boot=True, update_db=False): + """Start the database service and wait for the database to become + available. + The service auto-start will be updated only if the service command + succeeds. + + :param service_candidates: List of possible system service names. + :type service_candidates: list + + :param timeout: Wait timeout in seconds. + :type timeout: integer + + :param enable_on_boot: Enable service auto-start. + The auto-start setting will be updated + only if the service command succeeds. + :type enable_on_boot: boolean + + :param update_db: Suppress the Trove instance heartbeat. + :type update_db: boolean + + :raises: :class:`RuntimeError` on failure. + """ + LOG.info(_("Starting database service.")) + operating_system.start_service(service_candidates) + + LOG.debug("Waiting for database to start up.") + if not self._wait_for_database_service_status( + instance.ServiceStatuses.RUNNING, timeout, update_db): + raise RuntimeError(_("Database failed to start.")) + + LOG.info(_("Database has started successfully.")) + + if enable_on_boot: + LOG.info(_("Enable service auto-start on boot.")) + operating_system.enable_service_on_boot(service_candidates) + + def stop_db_service(self, service_candidates, timeout, + disable_on_boot=False, update_db=False): + """Stop the database service and wait for the database to shutdown. + + :param service_candidates: List of possible system service names. + :type service_candidates: list + + :param timeout: Wait timeout in seconds. + :type timeout: integer + + :param disable_on_boot: Disable service auto-start. + The auto-start setting will be updated + only if the service command succeeds. + :type disable_on_boot: boolean + + :param update_db: Suppress the Trove instance heartbeat. + :type update_db: boolean + + :raises: :class:`RuntimeError` on failure. + """ + LOG.info(_("Stopping database service.")) + operating_system.stop_service(service_candidates) + + LOG.debug("Waiting for database to shutdown.") + if not self._wait_for_database_service_status( + instance.ServiceStatuses.SHUTDOWN, timeout, update_db): + raise RuntimeError(_("Database failed to stop.")) + + LOG.info(_("Database has stopped successfully.")) + + if disable_on_boot: + LOG.info(_("Disable service auto-start on boot.")) + operating_system.disable_service_on_boot(service_candidates) + + def _wait_for_database_service_status(self, status, timeout, update_db): + """Wait for the given database status. + + :param status: The status to wait for. + :type status: BaseDbStatus + + :param timeout: Wait timeout in seconds. + :type timeout: integer + + :param update_db: Suppress the Trove instance heartbeat. + :type update_db: boolean + + :returns: True on success, False otherwise. + """ + if not self.wait_for_real_status_to_change_to( + status, timeout, update_db): + LOG.info(_("Service status did not change to %(status)s " + "within the given timeout: %(timeout)ds") + % {'status': status, 'timeout': timeout}) + LOG.debug("Attempting to cleanup stalled services.") + try: + self.cleanup_stalled_db_services() + except Exception: + LOG.debug("Cleanup failed.", exc_info=True) + return False + + return True + def wait_for_real_status_to_change_to(self, status, max_time, update_db=False): """ @@ -164,6 +294,12 @@ class BaseDbStatus(object): LOG.error(_("Timeout while waiting for database status to change.")) return False + def cleanup_stalled_db_services(self): + """An optional datastore-specific code to cleanup stalled + database services and other resources after a status change timeout. + """ + LOG.debug("No cleanup action specified for this datastore.") + def report_root(self, context, user): """Use conductor to update the root-enable status.""" LOG.debug("Casting report_root message to conductor.") diff --git a/trove/tests/unittests/guestagent/test_dbaas.py b/trove/tests/unittests/guestagent/test_dbaas.py index 9cab6a98..c60475cd 100644 --- a/trove/tests/unittests/guestagent/test_dbaas.py +++ b/trove/tests/unittests/guestagent/test_dbaas.py @@ -1936,6 +1936,179 @@ class BaseDbStatusTest(testtools.TestCase): rd_instance.ServiceStatuses.BUILD_PENDING, rd_instance.ServiceStatuses.BUILD_PENDING) + def test_wait_for_database_service_status(self): + status = BaseDbStatus() + expected_status = rd_instance.ServiceStatuses.RUNNING + timeout = 10 + update_db = False + + # Test a successful call. + with patch.multiple( + status, + wait_for_real_status_to_change_to=Mock(return_value=True), + cleanup_stalled_db_services=DEFAULT): + self.assertTrue( + status._wait_for_database_service_status( + expected_status, timeout, update_db)) + status.wait_for_real_status_to_change_to.assert_called_once_with( + expected_status, timeout, update_db) + self.assertFalse(status.cleanup_stalled_db_services.called) + + # Test a failing call. + with patch.multiple( + status, + wait_for_real_status_to_change_to=Mock(return_value=False), + cleanup_stalled_db_services=DEFAULT): + self.assertFalse( + status._wait_for_database_service_status( + expected_status, timeout, update_db)) + status.wait_for_real_status_to_change_to.assert_called_once_with( + expected_status, timeout, update_db) + status.cleanup_stalled_db_services.assert_called_once_with() + + # Test a failing call with an error raised from the cleanup code. + # No exception should propagate out of the cleanup block. + with patch.multiple( + status, + wait_for_real_status_to_change_to=Mock(return_value=False), + cleanup_stalled_db_services=Mock( + side_effect=Exception("Error in cleanup."))): + self.assertFalse( + status._wait_for_database_service_status( + expected_status, timeout, update_db)) + status.wait_for_real_status_to_change_to.assert_called_once_with( + expected_status, timeout, update_db) + status.cleanup_stalled_db_services.assert_called_once_with() + + def test_start_db_service(self): + status = BaseDbStatus() + service_candidates = ['name1', 'name2'] + + # Test a successful call with setting auto-start enabled. + with patch.object( + status, '_wait_for_database_service_status', + return_value=True) as service_call: + with patch.multiple(operating_system, start_service=DEFAULT, + enable_service_on_boot=DEFAULT) as os_cmd: + status.start_db_service( + service_candidates, 10, enable_on_boot=True) + service_call.assert_called_once_with( + rd_instance.ServiceStatuses.RUNNING, 10, False) + os_cmd['start_service'].assert_called_once_with( + service_candidates) + os_cmd['enable_service_on_boot'].assert_called_once_with( + service_candidates) + + # Test a successful call without auto-start. + with patch.object( + status, '_wait_for_database_service_status', + return_value=True) as service_call: + with patch.multiple(operating_system, start_service=DEFAULT, + enable_service_on_boot=DEFAULT) as os_cmd: + status.start_db_service( + service_candidates, 10, enable_on_boot=False) + service_call.assert_called_once_with( + rd_instance.ServiceStatuses.RUNNING, 10, False) + os_cmd['start_service'].assert_called_once_with( + service_candidates) + self.assertFalse(os_cmd['enable_service_on_boot'].called) + + # Test a failing call. + # The auto-start setting should not get updated if the service call + # fails. + with patch.object( + status, '_wait_for_database_service_status', + return_value=False) as service_call: + with patch.multiple(operating_system, start_service=DEFAULT, + enable_service_on_boot=DEFAULT) as os_cmd: + self.assertRaisesRegexp( + RuntimeError, "Database failed to start.", + status.start_db_service, + service_candidates, 10, enable_on_boot=True) + os_cmd['start_service'].assert_called_once_with( + service_candidates) + self.assertFalse(os_cmd['enable_service_on_boot'].called) + + def test_stop_db_service(self): + status = BaseDbStatus() + service_candidates = ['name1', 'name2'] + + # Test a successful call with setting auto-start disabled. + with patch.object( + status, '_wait_for_database_service_status', + return_value=True) as service_call: + with patch.multiple(operating_system, stop_service=DEFAULT, + disable_service_on_boot=DEFAULT) as os_cmd: + status.stop_db_service( + service_candidates, 10, disable_on_boot=True) + service_call.assert_called_once_with( + rd_instance.ServiceStatuses.SHUTDOWN, 10, False) + os_cmd['stop_service'].assert_called_once_with( + service_candidates) + os_cmd['disable_service_on_boot'].assert_called_once_with( + service_candidates) + + # Test a successful call without auto-start. + with patch.object( + status, '_wait_for_database_service_status', + return_value=True) as service_call: + with patch.multiple(operating_system, stop_service=DEFAULT, + disable_service_on_boot=DEFAULT) as os_cmd: + status.stop_db_service( + service_candidates, 10, disable_on_boot=False) + service_call.assert_called_once_with( + rd_instance.ServiceStatuses.SHUTDOWN, 10, False) + os_cmd['stop_service'].assert_called_once_with( + service_candidates) + self.assertFalse(os_cmd['disable_service_on_boot'].called) + + # Test a failing call. + # The auto-start setting should not get updated if the service call + # fails. + with patch.object( + status, '_wait_for_database_service_status', + return_value=False) as service_call: + with patch.multiple(operating_system, stop_service=DEFAULT, + disable_service_on_boot=DEFAULT) as os_cmd: + self.assertRaisesRegexp( + RuntimeError, "Database failed to stop.", + status.stop_db_service, + service_candidates, 10, disable_on_boot=True) + os_cmd['stop_service'].assert_called_once_with( + service_candidates) + self.assertFalse(os_cmd['disable_service_on_boot'].called) + + def test_restart_db_service(self): + status = BaseDbStatus() + service_candidates = ['name1', 'name2'] + + # Test the restart flow (stop followed by start). + # Assert that the auto-start setting does not get changed and the + # Trove instance status updates are suppressed during restart. + with patch.multiple( + status, start_db_service=DEFAULT, stop_db_service=DEFAULT, + begin_restart=DEFAULT, end_install_or_restart=DEFAULT): + status.restart_db_service(service_candidates, 10) + status.begin_restart.assert_called_once_with() + status.stop_db_service.assert_called_once_with( + service_candidates, 10, disable_on_boot=False, update_db=False) + status.start_db_service.assert_called_once_with( + service_candidates, 10, enable_on_boot=False, update_db=False) + status.end_install_or_restart.assert_called_once_with() + + # Test a failing call. + # Assert the status heartbeat gets re-enabled. + with patch.multiple( + status, start_db_service=Mock( + side_effect=Exception("Error in database start.")), + stop_db_service=DEFAULT, begin_restart=DEFAULT, + end_install_or_restart=DEFAULT): + self.assertRaisesRegexp( + RuntimeError, "Database restart failed.", + status.restart_db_service, service_candidates, 10) + status.begin_restart.assert_called_once_with() + status.end_install_or_restart.assert_called_once_with() + class MySqlAppStatusTest(testtools.TestCase): |