diff options
author | Lingxian Kong <anlin.kong@gmail.com> | 2020-05-31 00:01:05 +1200 |
---|---|---|
committer | Lingxian Kong <anlin.kong@gmail.com> | 2020-06-04 10:12:04 +1200 |
commit | dc117d8dd829c64971baefdbaca835aa5c7fb7b7 (patch) | |
tree | 15875afbbcf7edd1519bfc6d78f94603c08462cb /trove/guestagent | |
parent | ff4b6a13397b6b7102cddff70098b23a2d6ab606 (diff) | |
download | trove-dc117d8dd829c64971baefdbaca835aa5c7fb7b7.tar.gz |
Set status to ERROR if heartbeat expires
Change-Id: Ib8f5062094c0ec7766d4c6c6e7c3c8168e15ebd6
Diffstat (limited to 'trove/guestagent')
-rw-r--r-- | trove/guestagent/api.py | 18 | ||||
-rw-r--r-- | trove/guestagent/datastore/manager.py | 9 | ||||
-rw-r--r-- | trove/guestagent/datastore/mysql_common/manager.py | 14 | ||||
-rw-r--r-- | trove/guestagent/datastore/mysql_common/service.py | 41 | ||||
-rw-r--r-- | trove/guestagent/datastore/service.py | 26 |
5 files changed, 71 insertions, 37 deletions
diff --git a/trove/guestagent/api.py b/trove/guestagent/api.py index 27e626e0..67c3baa5 100644 --- a/trove/guestagent/api.py +++ b/trove/guestagent/api.py @@ -380,6 +380,14 @@ class API(object): self.agent_high_timeout, version=version, upgrade_info=upgrade_info) + def upgrade(self, upgrade_info): + """Upgrade database service.""" + LOG.debug("Sending the call to upgrade database service.") + version = self.API_BASE_VERSION + + return self._cast("upgrade", version=version, + upgrade_info=upgrade_info) + def restart(self): """Restart the database server.""" LOG.debug("Sending the call to restart the database process " @@ -419,16 +427,6 @@ class API(object): self._call("stop_db", self.agent_low_timeout, version=version) - def upgrade(self, instance_version, location, metadata=None): - """Make an asynchronous call to self upgrade the guest agent.""" - LOG.debug("Sending an upgrade call to nova-guest.") - version = self.API_BASE_VERSION - - self._cast("upgrade", version=version, - instance_version=instance_version, - location=location, - metadata=metadata) - def get_volume_info(self): """Make a synchronous call to get volume info for the container.""" LOG.debug("Check Volume Info on instance %s.", self.id) diff --git a/trove/guestagent/datastore/manager.py b/trove/guestagent/datastore/manager.py index 0a639417..a7e126f1 100644 --- a/trove/guestagent/datastore/manager.py +++ b/trove/guestagent/datastore/manager.py @@ -25,7 +25,6 @@ from oslo_service import periodic_task from trove.common import cfg from trove.common import exception -from trove.common import instance from trove.common.i18n import _ from trove.common.notification import EndNotification from trove.guestagent import dbaas @@ -37,6 +36,7 @@ from trove.guestagent.common.operating_system import FileMode from trove.guestagent.module import driver_manager from trove.guestagent.module import module_manager from trove.guestagent.strategies import replication as repl_strategy +from trove.instance import service_status LOG = logging.getLogger(__name__) CONF = cfg.CONF @@ -306,6 +306,10 @@ class Manager(periodic_task.PeriodicTasks): """ return {} + def upgrade(self, context, upgrade_info): + """Upgrade the database.""" + pass + def post_upgrade(self, context, upgrade_info): """Recovers the guest after the image is upgraded using information from the pre_upgrade step @@ -588,7 +592,8 @@ class Manager(periodic_task.PeriodicTasks): self.configuration_manager.apply_system_override( config_man_values, change_id=apply_label, pre_user=True) if restart_required: - self.status.set_status(instance.ServiceStatuses.RESTART_REQUIRED) + self.status.set_status( + service_status.ServiceStatuses.RESTART_REQUIRED) else: self.apply_overrides(context, cfg_values) diff --git a/trove/guestagent/datastore/mysql_common/manager.py b/trove/guestagent/datastore/mysql_common/manager.py index 2d31e2f6..eeb1707a 100644 --- a/trove/guestagent/datastore/mysql_common/manager.py +++ b/trove/guestagent/datastore/mysql_common/manager.py @@ -22,7 +22,6 @@ from oslo_log import log as logging from trove.common import cfg from trove.common import configurations from trove.common import exception -from trove.common import instance as rd_instance from trove.common import utils from trove.common.notification import EndNotification from trove.guestagent import guest_log @@ -32,6 +31,7 @@ from trove.guestagent.datastore import manager from trove.guestagent.strategies import replication as repl_strategy from trove.guestagent.utils import docker as docker_util from trove.guestagent.utils import mysql as mysql_util +from trove.instance import service_status LOG = logging.getLogger(__name__) CONF = cfg.CONF @@ -71,7 +71,7 @@ class MySqlManager(manager.Manager): client.execute(cmd) LOG.debug("Database service check: database query is responsive") - return rd_instance.ServiceStatuses.HEALTHY + return service_status.ServiceStatuses.HEALTHY except Exception: return super(MySqlManager, self).get_service_status() @@ -295,7 +295,7 @@ class MySqlManager(manager.Manager): self.app.restore_backup(context, backup_info, restore_location) except Exception: LOG.error("Failed to restore from backup %s.", backup_info['id']) - self.status.set_status(rd_instance.ServiceStatuses.FAILED) + self.status.set_status(service_status.ServiceStatuses.FAILED) raise LOG.info("Finished restore data from backup %s", backup_info['id']) @@ -365,7 +365,7 @@ class MySqlManager(manager.Manager): slave_config) except Exception as err: LOG.error("Error enabling replication, error: %s", str(err)) - self.status.set_status(rd_instance.ServiceStatuses.FAILED) + self.status.set_status(service_status.ServiceStatuses.FAILED) raise def detach_replica(self, context, for_failover=False): @@ -431,3 +431,9 @@ class MySqlManager(manager.Manager): def demote_replication_master(self, context): LOG.info("Demoting replication master.") self.replication.demote_master(self.app) + + def upgrade(self, context, upgrade_info): + """Upgrade the database.""" + LOG.info('Starting to upgrade database, upgrade_info: %s', + upgrade_info) + self.app.upgrade(upgrade_info) diff --git a/trove/guestagent/datastore/mysql_common/service.py b/trove/guestagent/datastore/mysql_common/service.py index c98b3279..77b10d81 100644 --- a/trove/guestagent/datastore/mysql_common/service.py +++ b/trove/guestagent/datastore/mysql_common/service.py @@ -27,7 +27,6 @@ from sqlalchemy.sql.expression import text from trove.backup.state import BackupState from trove.common import cfg from trove.common import exception -from trove.common import instance from trove.common import utils from trove.common.configurations import MySQLConfParser from trove.common.db.mysql import models @@ -43,6 +42,7 @@ from trove.guestagent.datastore import service from trove.guestagent.datastore.mysql_common import service as commmon_service from trove.guestagent.utils import docker as docker_util from trove.guestagent.utils import mysql as mysql_util +from trove.instance import service_status LOG = logging.getLogger(__name__) CONF = cfg.CONF @@ -77,24 +77,24 @@ class BaseMySqlAppStatus(service.BaseDbStatus): cmd = 'mysql -uroot -p%s -e "select 1;"' % root_pass try: docker_util.run_command(self.docker_client, cmd) - return instance.ServiceStatuses.HEALTHY + return service_status.ServiceStatuses.HEALTHY except Exception as exc: LOG.warning('Failed to run docker command, error: %s', str(exc)) container_log = docker_util.get_container_logs( self.docker_client, tail='all') - LOG.warning('container log: %s', '\n'.join(container_log)) - return instance.ServiceStatuses.RUNNING + LOG.debug('container log: \n%s', '\n'.join(container_log)) + return service_status.ServiceStatuses.RUNNING elif status == "not running": - return instance.ServiceStatuses.SHUTDOWN + return service_status.ServiceStatuses.SHUTDOWN elif status == "paused": - return instance.ServiceStatuses.PAUSED + return service_status.ServiceStatuses.PAUSED elif status == "exited": - return instance.ServiceStatuses.SHUTDOWN + return service_status.ServiceStatuses.SHUTDOWN elif status == "dead": - return instance.ServiceStatuses.CRASHED + return service_status.ServiceStatuses.CRASHED else: - return instance.ServiceStatuses.UNKNOWN + return service_status.ServiceStatuses.UNKNOWN @six.add_metaclass(abc.ABCMeta) @@ -638,8 +638,9 @@ class BaseMySqlApp(object): raise exception.TroveError(_("Failed to start mysql")) if not self.status.wait_for_real_status_to_change_to( - instance.ServiceStatuses.HEALTHY, - CONF.state_change_wait_time, update_db): + service_status.ServiceStatuses.HEALTHY, + CONF.state_change_wait_time, update_db + ): raise exception.TroveError(_("Failed to start mysql")) def start_db_with_conf_changes(self, config_contents): @@ -662,7 +663,7 @@ class BaseMySqlApp(object): raise exception.TroveError("Failed to stop mysql") if not self.status.wait_for_real_status_to_change_to( - instance.ServiceStatuses.SHUTDOWN, + service_status.ServiceStatuses.SHUTDOWN, CONF.state_change_wait_time, update_db): raise exception.TroveError("Failed to stop mysql") @@ -714,7 +715,7 @@ class BaseMySqlApp(object): raise exception.TroveError("Failed to restart mysql") if not self.status.wait_for_real_status_to_change_to( - instance.ServiceStatuses.HEALTHY, + service_status.ServiceStatuses.HEALTHY, CONF.state_change_wait_time, update_db=False): raise exception.TroveError("Failed to start mysql") @@ -949,6 +950,20 @@ class BaseMySqlApp(object): q = "set global read_only = %s" % read_only client.execute(text(str(q))) + def upgrade(self, upgrade_info): + """Upgrade the database.""" + new_version = upgrade_info.get('datastore_version') + + LOG.info('Stopping db container for upgrade') + self.stop_db() + + LOG.info('Deleting db container for upgrade') + docker_util.remove_container(self.docker_client) + + LOG.info('Starting new db container with version %s for upgrade', + new_version) + self.start_db(update_db=True, ds_version=new_version) + class BaseMySqlRootAccess(object): def __init__(self, mysql_app): diff --git a/trove/guestagent/datastore/service.py b/trove/guestagent/datastore/service.py index 63a5464e..7ac5fb03 100644 --- a/trove/guestagent/datastore/service.py +++ b/trove/guestagent/datastore/service.py @@ -20,11 +20,11 @@ from oslo_utils import timeutils from trove.common import cfg from trove.common import context as trove_context -from trove.common import instance from trove.common.i18n import _ from trove.conductor import api as conductor_api from trove.guestagent.common import guestagent_utils from trove.guestagent.common import operating_system +from trove.instance import service_status LOG = logging.getLogger(__name__) CONF = cfg.CONF @@ -74,7 +74,7 @@ class BaseDbStatus(object): operating_system.write_file(prepare_start_file, '') self.__refresh_prepare_completed() - self.set_status(instance.ServiceStatuses.BUILDING, True) + self.set_status(service_status.ServiceStatuses.BUILDING, True) def set_ready(self): prepare_end_file = guestagent_utils.build_file_path( @@ -92,9 +92,9 @@ class BaseDbStatus(object): final_status = None if error_occurred: - final_status = instance.ServiceStatuses.FAILED + final_status = service_status.ServiceStatuses.FAILED elif post_processing: - final_status = instance.ServiceStatuses.INSTANCE_READY + final_status = service_status.ServiceStatuses.INSTANCE_READY if final_status: LOG.info("Set final status to %s.", final_status) @@ -126,8 +126,8 @@ class BaseDbStatus(object): def is_running(self): """True if DB server is running.""" return (self.status is not None and - self.status in [instance.ServiceStatuses.RUNNING, - instance.ServiceStatuses.HEALTHY]) + self.status in [service_status.ServiceStatuses.RUNNING, + service_status.ServiceStatuses.HEALTHY]) def set_status(self, status, force=False): """Use conductor to update the DB app status.""" @@ -199,7 +199,7 @@ class BaseDbStatus(object): """ LOG.debug("Waiting for database to start up.") if not self._wait_for_database_service_status( - instance.ServiceStatuses.RUNNING, timeout, update_db): + service_status.ServiceStatuses.RUNNING, timeout, update_db): raise RuntimeError(_("Database failed to start.")) LOG.info("Database has started successfully.") @@ -229,7 +229,7 @@ class BaseDbStatus(object): LOG.debug("Waiting for database to shutdown.") if not self._wait_for_database_service_status( - instance.ServiceStatuses.SHUTDOWN, timeout, update_db): + service_status.ServiceStatuses.SHUTDOWN, timeout, update_db): raise RuntimeError(_("Database failed to stop.")) LOG.info("Database has stopped successfully.") @@ -283,9 +283,19 @@ class BaseDbStatus(object): # outside. loop = True + # We need 3 (by default) consecutive success db connections for status + # 'HEALTHY' + healthy_count = 0 + while loop: self.status = self.get_actual_db_status() if self.status == status: + if (status == service_status.ServiceStatuses.HEALTHY and + healthy_count < 2): + healthy_count += 1 + time.sleep(CONF.state_change_poll_time) + continue + if update_db: self.set_status(self.status) return True |