summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJenkins <jenkins@review.openstack.org>2015-09-23 19:06:47 +0000
committerGerrit Code Review <review@openstack.org>2015-09-23 19:06:47 +0000
commit9dafaf4929af838a14df41c59b9c413c64479b55 (patch)
tree179a31dd49c26361509cd5f7a80dc4c879d50c87
parentf4a1e2e0a3a6262cbc8d95045f49a19cf45f0e4a (diff)
parentf7f15f50dd54887dc57c9c0175bcc26953f28fa8 (diff)
downloadtrove-9dafaf4929af838a14df41c59b9c413c64479b55.tar.gz
Merge "Fix Postgres services management"
-rw-r--r--trove/guestagent/datastore/experimental/postgresql/service/process.py82
-rw-r--r--trove/guestagent/datastore/service.py136
-rw-r--r--trove/tests/unittests/guestagent/test_dbaas.py173
3 files changed, 322 insertions, 69 deletions
diff --git a/trove/guestagent/datastore/experimental/postgresql/service/process.py b/trove/guestagent/datastore/experimental/postgresql/service/process.py
index 4495895d..a4fda138 100644
--- a/trove/guestagent/datastore/experimental/postgresql/service/process.py
+++ b/trove/guestagent/datastore/experimental/postgresql/service/process.py
@@ -16,83 +16,27 @@
from oslo_log import log as logging
from trove.common import cfg
-from trove.common import utils
-from trove.guestagent.common import operating_system
from trove.guestagent.datastore.experimental.postgresql.service.status import (
PgSqlAppStatus)
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
-
-PGSQL_SERVICE_CANDIDATES = ("postgresql",)
+PGSQL_SERVICE_CANDIDATES = ["postgresql"]
class PgSqlProcess(object):
"""Mixin that manages the PgSql process."""
- def start_db(self, context):
- self._enable_pgsql_on_boot()
- """Start the PgSql service."""
- cmd = operating_system.service_discovery(PGSQL_SERVICE_CANDIDATES)
- LOG.info(
- _("{guest_id}: Starting database engine with command ({command}).")
- .format(
- guest_id=CONF.guest_id,
- command=cmd['cmd_start'],
- )
- )
- utils.execute_with_timeout(
- *cmd['cmd_start'].split(),
- timeout=30
- )
-
- def _enable_pgsql_on_boot(self):
- try:
- pgsql_service = operating_system.service_discovery(
- PGSQL_SERVICE_CANDIDATES)
- utils.execute_with_timeout(pgsql_service['cmd_enable'],
- shell=True)
- except KeyError:
- LOG.exception(_("Error enabling PostgreSQL start on boot."))
- raise RuntimeError("Service is not discovered.")
-
- def _disable_pgsql_on_boot(self):
- try:
- pgsql_service = operating_system.service_discovery(
- PGSQL_SERVICE_CANDIDATES)
- utils.execute_with_timeout(pgsql_service['cmd_disable'],
- shell=True)
- except KeyError:
- LOG.exception(_("Error disabling PostgreSQL start on boot."))
- raise RuntimeError("Service is not discovered.")
-
- def stop_db(self, context, do_not_start_on_reboot=False):
- """Stop the PgSql service."""
- if do_not_start_on_reboot:
- self._disable_pgsql_on_boot()
- cmd = operating_system.service_discovery(PGSQL_SERVICE_CANDIDATES)
- LOG.info(
- _("{guest_id}: Stopping database engine with command ({command}).")
- .format(
- guest_id=CONF.guest_id,
- command=cmd['cmd_stop'],
- )
- )
- utils.execute_with_timeout(
- *cmd['cmd_stop'].split(),
- timeout=30
- )
-
def restart(self, context):
- """Restart the PgSql service."""
- LOG.info(
- _("{guest_id}: Restarting database engine.").format(
- guest_id=CONF.guest_id,
- )
- )
- try:
- PgSqlAppStatus.get().begin_restart()
- self.stop_db(context)
- self.start_db(context)
- finally:
- PgSqlAppStatus.get().end_install_or_restart()
+ PgSqlAppStatus.get().restart_db_service(
+ PGSQL_SERVICE_CANDIDATES, CONF.state_change_wait_time)
+
+ def start_db(self, context, enable_on_boot=True, update_db=False):
+ PgSqlAppStatus.get().start_db_service(
+ PGSQL_SERVICE_CANDIDATES, CONF.state_change_wait_time,
+ enable_on_boot=enable_on_boot, update_db=update_db)
+
+ def stop_db(self, context, do_not_start_on_reboot=False, update_db=False):
+ PgSqlAppStatus.get().stop_db_service(
+ PGSQL_SERVICE_CANDIDATES, CONF.state_change_wait_time,
+ disable_on_boot=do_not_start_on_reboot, update_db=update_db)
diff --git a/trove/guestagent/datastore/service.py b/trove/guestagent/datastore/service.py
index 630a502c..a1a5d75b 100644
--- a/trove/guestagent/datastore/service.py
+++ b/trove/guestagent/datastore/service.py
@@ -23,6 +23,7 @@ from trove.common import context as trove_context
from trove.common.i18n import _
from trove.common import instance
from trove.conductor import api as conductor_api
+from trove.guestagent.common import operating_system
from trove.guestagent.common import timeutils
LOG = logging.getLogger(__name__)
@@ -141,6 +142,135 @@ class BaseDbStatus(object):
"for now we'll skip determining the status of DB on "
"this instance."))
+ def restart_db_service(self, service_candidates, timeout):
+ """Restart the database.
+ Do not change the service auto-start setting.
+ Disable the Trove instance heartbeat updates during the restart.
+
+ 1. Stop the database service.
+ 2. Wait for the database to shutdown.
+ 3. Start the database service.
+ 4. Wait for the database to start running.
+
+ :param service_candidates: List of possible system service names.
+ :type service_candidates: list
+
+ :param timeout: Wait timeout in seconds.
+ :type timeout: integer
+
+ :raises: :class:`RuntimeError` on failure.
+ """
+ try:
+ self.begin_restart()
+ self.stop_db_service(service_candidates, timeout,
+ disable_on_boot=False, update_db=False)
+ self.start_db_service(service_candidates, timeout,
+ enable_on_boot=False, update_db=False)
+ except Exception as e:
+ LOG.exception(e)
+ raise RuntimeError(_("Database restart failed."))
+ finally:
+ self.end_install_or_restart()
+
+ def start_db_service(self, service_candidates, timeout,
+ enable_on_boot=True, update_db=False):
+ """Start the database service and wait for the database to become
+ available.
+ The service auto-start will be updated only if the service command
+ succeeds.
+
+ :param service_candidates: List of possible system service names.
+ :type service_candidates: list
+
+ :param timeout: Wait timeout in seconds.
+ :type timeout: integer
+
+ :param enable_on_boot: Enable service auto-start.
+ The auto-start setting will be updated
+ only if the service command succeeds.
+ :type enable_on_boot: boolean
+
+ :param update_db: Suppress the Trove instance heartbeat.
+ :type update_db: boolean
+
+ :raises: :class:`RuntimeError` on failure.
+ """
+ LOG.info(_("Starting database service."))
+ operating_system.start_service(service_candidates)
+
+ LOG.debug("Waiting for database to start up.")
+ if not self._wait_for_database_service_status(
+ instance.ServiceStatuses.RUNNING, timeout, update_db):
+ raise RuntimeError(_("Database failed to start."))
+
+ LOG.info(_("Database has started successfully."))
+
+ if enable_on_boot:
+ LOG.info(_("Enable service auto-start on boot."))
+ operating_system.enable_service_on_boot(service_candidates)
+
+ def stop_db_service(self, service_candidates, timeout,
+ disable_on_boot=False, update_db=False):
+ """Stop the database service and wait for the database to shutdown.
+
+ :param service_candidates: List of possible system service names.
+ :type service_candidates: list
+
+ :param timeout: Wait timeout in seconds.
+ :type timeout: integer
+
+ :param disable_on_boot: Disable service auto-start.
+ The auto-start setting will be updated
+ only if the service command succeeds.
+ :type disable_on_boot: boolean
+
+ :param update_db: Suppress the Trove instance heartbeat.
+ :type update_db: boolean
+
+ :raises: :class:`RuntimeError` on failure.
+ """
+ LOG.info(_("Stopping database service."))
+ operating_system.stop_service(service_candidates)
+
+ LOG.debug("Waiting for database to shutdown.")
+ if not self._wait_for_database_service_status(
+ instance.ServiceStatuses.SHUTDOWN, timeout, update_db):
+ raise RuntimeError(_("Database failed to stop."))
+
+ LOG.info(_("Database has stopped successfully."))
+
+ if disable_on_boot:
+ LOG.info(_("Disable service auto-start on boot."))
+ operating_system.disable_service_on_boot(service_candidates)
+
+ def _wait_for_database_service_status(self, status, timeout, update_db):
+ """Wait for the given database status.
+
+ :param status: The status to wait for.
+ :type status: BaseDbStatus
+
+ :param timeout: Wait timeout in seconds.
+ :type timeout: integer
+
+ :param update_db: Suppress the Trove instance heartbeat.
+ :type update_db: boolean
+
+ :returns: True on success, False otherwise.
+ """
+ if not self.wait_for_real_status_to_change_to(
+ status, timeout, update_db):
+ LOG.info(_("Service status did not change to %(status)s "
+ "within the given timeout: %(timeout)ds")
+ % {'status': status, 'timeout': timeout})
+ LOG.debug("Attempting to cleanup stalled services.")
+ try:
+ self.cleanup_stalled_db_services()
+ except Exception:
+ LOG.debug("Cleanup failed.", exc_info=True)
+ return False
+
+ return True
+
def wait_for_real_status_to_change_to(self, status, max_time,
update_db=False):
"""
@@ -164,6 +294,12 @@ class BaseDbStatus(object):
LOG.error(_("Timeout while waiting for database status to change."))
return False
+ def cleanup_stalled_db_services(self):
+ """An optional datastore-specific code to cleanup stalled
+ database services and other resources after a status change timeout.
+ """
+ LOG.debug("No cleanup action specified for this datastore.")
+
def report_root(self, context, user):
"""Use conductor to update the root-enable status."""
LOG.debug("Casting report_root message to conductor.")
diff --git a/trove/tests/unittests/guestagent/test_dbaas.py b/trove/tests/unittests/guestagent/test_dbaas.py
index 9cab6a98..c60475cd 100644
--- a/trove/tests/unittests/guestagent/test_dbaas.py
+++ b/trove/tests/unittests/guestagent/test_dbaas.py
@@ -1936,6 +1936,179 @@ class BaseDbStatusTest(testtools.TestCase):
rd_instance.ServiceStatuses.BUILD_PENDING,
rd_instance.ServiceStatuses.BUILD_PENDING)
+ def test_wait_for_database_service_status(self):
+ status = BaseDbStatus()
+ expected_status = rd_instance.ServiceStatuses.RUNNING
+ timeout = 10
+ update_db = False
+
+ # Test a successful call.
+ with patch.multiple(
+ status,
+ wait_for_real_status_to_change_to=Mock(return_value=True),
+ cleanup_stalled_db_services=DEFAULT):
+ self.assertTrue(
+ status._wait_for_database_service_status(
+ expected_status, timeout, update_db))
+ status.wait_for_real_status_to_change_to.assert_called_once_with(
+ expected_status, timeout, update_db)
+ self.assertFalse(status.cleanup_stalled_db_services.called)
+
+ # Test a failing call.
+ with patch.multiple(
+ status,
+ wait_for_real_status_to_change_to=Mock(return_value=False),
+ cleanup_stalled_db_services=DEFAULT):
+ self.assertFalse(
+ status._wait_for_database_service_status(
+ expected_status, timeout, update_db))
+ status.wait_for_real_status_to_change_to.assert_called_once_with(
+ expected_status, timeout, update_db)
+ status.cleanup_stalled_db_services.assert_called_once_with()
+
+ # Test a failing call with an error raised from the cleanup code.
+ # No exception should propagate out of the cleanup block.
+ with patch.multiple(
+ status,
+ wait_for_real_status_to_change_to=Mock(return_value=False),
+ cleanup_stalled_db_services=Mock(
+ side_effect=Exception("Error in cleanup."))):
+ self.assertFalse(
+ status._wait_for_database_service_status(
+ expected_status, timeout, update_db))
+ status.wait_for_real_status_to_change_to.assert_called_once_with(
+ expected_status, timeout, update_db)
+ status.cleanup_stalled_db_services.assert_called_once_with()
+
+ def test_start_db_service(self):
+ status = BaseDbStatus()
+ service_candidates = ['name1', 'name2']
+
+ # Test a successful call with setting auto-start enabled.
+ with patch.object(
+ status, '_wait_for_database_service_status',
+ return_value=True) as service_call:
+ with patch.multiple(operating_system, start_service=DEFAULT,
+ enable_service_on_boot=DEFAULT) as os_cmd:
+ status.start_db_service(
+ service_candidates, 10, enable_on_boot=True)
+ service_call.assert_called_once_with(
+ rd_instance.ServiceStatuses.RUNNING, 10, False)
+ os_cmd['start_service'].assert_called_once_with(
+ service_candidates)
+ os_cmd['enable_service_on_boot'].assert_called_once_with(
+ service_candidates)
+
+ # Test a successful call without auto-start.
+ with patch.object(
+ status, '_wait_for_database_service_status',
+ return_value=True) as service_call:
+ with patch.multiple(operating_system, start_service=DEFAULT,
+ enable_service_on_boot=DEFAULT) as os_cmd:
+ status.start_db_service(
+ service_candidates, 10, enable_on_boot=False)
+ service_call.assert_called_once_with(
+ rd_instance.ServiceStatuses.RUNNING, 10, False)
+ os_cmd['start_service'].assert_called_once_with(
+ service_candidates)
+ self.assertFalse(os_cmd['enable_service_on_boot'].called)
+
+ # Test a failing call.
+ # The auto-start setting should not get updated if the service call
+ # fails.
+ with patch.object(
+ status, '_wait_for_database_service_status',
+ return_value=False) as service_call:
+ with patch.multiple(operating_system, start_service=DEFAULT,
+ enable_service_on_boot=DEFAULT) as os_cmd:
+ self.assertRaisesRegexp(
+ RuntimeError, "Database failed to start.",
+ status.start_db_service,
+ service_candidates, 10, enable_on_boot=True)
+ os_cmd['start_service'].assert_called_once_with(
+ service_candidates)
+ self.assertFalse(os_cmd['enable_service_on_boot'].called)
+
+ def test_stop_db_service(self):
+ status = BaseDbStatus()
+ service_candidates = ['name1', 'name2']
+
+ # Test a successful call with setting auto-start disabled.
+ with patch.object(
+ status, '_wait_for_database_service_status',
+ return_value=True) as service_call:
+ with patch.multiple(operating_system, stop_service=DEFAULT,
+ disable_service_on_boot=DEFAULT) as os_cmd:
+ status.stop_db_service(
+ service_candidates, 10, disable_on_boot=True)
+ service_call.assert_called_once_with(
+ rd_instance.ServiceStatuses.SHUTDOWN, 10, False)
+ os_cmd['stop_service'].assert_called_once_with(
+ service_candidates)
+ os_cmd['disable_service_on_boot'].assert_called_once_with(
+ service_candidates)
+
+ # Test a successful call without auto-start.
+ with patch.object(
+ status, '_wait_for_database_service_status',
+ return_value=True) as service_call:
+ with patch.multiple(operating_system, stop_service=DEFAULT,
+ disable_service_on_boot=DEFAULT) as os_cmd:
+ status.stop_db_service(
+ service_candidates, 10, disable_on_boot=False)
+ service_call.assert_called_once_with(
+ rd_instance.ServiceStatuses.SHUTDOWN, 10, False)
+ os_cmd['stop_service'].assert_called_once_with(
+ service_candidates)
+ self.assertFalse(os_cmd['disable_service_on_boot'].called)
+
+ # Test a failing call.
+ # The auto-start setting should not get updated if the service call
+ # fails.
+ with patch.object(
+ status, '_wait_for_database_service_status',
+ return_value=False) as service_call:
+ with patch.multiple(operating_system, stop_service=DEFAULT,
+ disable_service_on_boot=DEFAULT) as os_cmd:
+ self.assertRaisesRegexp(
+ RuntimeError, "Database failed to stop.",
+ status.stop_db_service,
+ service_candidates, 10, disable_on_boot=True)
+ os_cmd['stop_service'].assert_called_once_with(
+ service_candidates)
+ self.assertFalse(os_cmd['disable_service_on_boot'].called)
+
+ def test_restart_db_service(self):
+ status = BaseDbStatus()
+ service_candidates = ['name1', 'name2']
+
+ # Test the restart flow (stop followed by start).
+ # Assert that the auto-start setting does not get changed and the
+ # Trove instance status updates are suppressed during restart.
+ with patch.multiple(
+ status, start_db_service=DEFAULT, stop_db_service=DEFAULT,
+ begin_restart=DEFAULT, end_install_or_restart=DEFAULT):
+ status.restart_db_service(service_candidates, 10)
+ status.begin_restart.assert_called_once_with()
+ status.stop_db_service.assert_called_once_with(
+ service_candidates, 10, disable_on_boot=False, update_db=False)
+ status.start_db_service.assert_called_once_with(
+ service_candidates, 10, enable_on_boot=False, update_db=False)
+ status.end_install_or_restart.assert_called_once_with()
+
+ # Test a failing call.
+ # Assert the status heartbeat gets re-enabled.
+ with patch.multiple(
+ status, start_db_service=Mock(
+ side_effect=Exception("Error in database start.")),
+ stop_db_service=DEFAULT, begin_restart=DEFAULT,
+ end_install_or_restart=DEFAULT):
+ self.assertRaisesRegexp(
+ RuntimeError, "Database restart failed.",
+ status.restart_db_service, service_candidates, 10)
+ status.begin_restart.assert_called_once_with()
+ status.end_install_or_restart.assert_called_once_with()
+
class MySqlAppStatusTest(testtools.TestCase):