summaryrefslogtreecommitdiff
path: root/trove/guestagent
diff options
context:
space:
mode:
authorLingxian Kong <anlin.kong@gmail.com>2020-05-31 00:01:05 +1200
committerLingxian Kong <anlin.kong@gmail.com>2020-06-04 10:12:04 +1200
commitdc117d8dd829c64971baefdbaca835aa5c7fb7b7 (patch)
tree15875afbbcf7edd1519bfc6d78f94603c08462cb /trove/guestagent
parentff4b6a13397b6b7102cddff70098b23a2d6ab606 (diff)
downloadtrove-dc117d8dd829c64971baefdbaca835aa5c7fb7b7.tar.gz
Set status to ERROR if heartbeat expires
Change-Id: Ib8f5062094c0ec7766d4c6c6e7c3c8168e15ebd6
Diffstat (limited to 'trove/guestagent')
-rw-r--r--trove/guestagent/api.py18
-rw-r--r--trove/guestagent/datastore/manager.py9
-rw-r--r--trove/guestagent/datastore/mysql_common/manager.py14
-rw-r--r--trove/guestagent/datastore/mysql_common/service.py41
-rw-r--r--trove/guestagent/datastore/service.py26
5 files changed, 71 insertions, 37 deletions
diff --git a/trove/guestagent/api.py b/trove/guestagent/api.py
index 27e626e0..67c3baa5 100644
--- a/trove/guestagent/api.py
+++ b/trove/guestagent/api.py
@@ -380,6 +380,14 @@ class API(object):
self.agent_high_timeout, version=version,
upgrade_info=upgrade_info)
+ def upgrade(self, upgrade_info):
+ """Upgrade database service."""
+ LOG.debug("Sending the call to upgrade database service.")
+ version = self.API_BASE_VERSION
+
+ return self._cast("upgrade", version=version,
+ upgrade_info=upgrade_info)
+
def restart(self):
"""Restart the database server."""
LOG.debug("Sending the call to restart the database process "
@@ -419,16 +427,6 @@ class API(object):
self._call("stop_db", self.agent_low_timeout,
version=version)
- def upgrade(self, instance_version, location, metadata=None):
- """Make an asynchronous call to self upgrade the guest agent."""
- LOG.debug("Sending an upgrade call to nova-guest.")
- version = self.API_BASE_VERSION
-
- self._cast("upgrade", version=version,
- instance_version=instance_version,
- location=location,
- metadata=metadata)
-
def get_volume_info(self):
"""Make a synchronous call to get volume info for the container."""
LOG.debug("Check Volume Info on instance %s.", self.id)
diff --git a/trove/guestagent/datastore/manager.py b/trove/guestagent/datastore/manager.py
index 0a639417..a7e126f1 100644
--- a/trove/guestagent/datastore/manager.py
+++ b/trove/guestagent/datastore/manager.py
@@ -25,7 +25,6 @@ from oslo_service import periodic_task
from trove.common import cfg
from trove.common import exception
-from trove.common import instance
from trove.common.i18n import _
from trove.common.notification import EndNotification
from trove.guestagent import dbaas
@@ -37,6 +36,7 @@ from trove.guestagent.common.operating_system import FileMode
from trove.guestagent.module import driver_manager
from trove.guestagent.module import module_manager
from trove.guestagent.strategies import replication as repl_strategy
+from trove.instance import service_status
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
@@ -306,6 +306,10 @@ class Manager(periodic_task.PeriodicTasks):
"""
return {}
+ def upgrade(self, context, upgrade_info):
+ """Upgrade the database."""
+ pass
+
def post_upgrade(self, context, upgrade_info):
"""Recovers the guest after the image is upgraded using information
from the pre_upgrade step
@@ -588,7 +592,8 @@ class Manager(periodic_task.PeriodicTasks):
self.configuration_manager.apply_system_override(
config_man_values, change_id=apply_label, pre_user=True)
if restart_required:
- self.status.set_status(instance.ServiceStatuses.RESTART_REQUIRED)
+ self.status.set_status(
+ service_status.ServiceStatuses.RESTART_REQUIRED)
else:
self.apply_overrides(context, cfg_values)
diff --git a/trove/guestagent/datastore/mysql_common/manager.py b/trove/guestagent/datastore/mysql_common/manager.py
index 2d31e2f6..eeb1707a 100644
--- a/trove/guestagent/datastore/mysql_common/manager.py
+++ b/trove/guestagent/datastore/mysql_common/manager.py
@@ -22,7 +22,6 @@ from oslo_log import log as logging
from trove.common import cfg
from trove.common import configurations
from trove.common import exception
-from trove.common import instance as rd_instance
from trove.common import utils
from trove.common.notification import EndNotification
from trove.guestagent import guest_log
@@ -32,6 +31,7 @@ from trove.guestagent.datastore import manager
from trove.guestagent.strategies import replication as repl_strategy
from trove.guestagent.utils import docker as docker_util
from trove.guestagent.utils import mysql as mysql_util
+from trove.instance import service_status
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
@@ -71,7 +71,7 @@ class MySqlManager(manager.Manager):
client.execute(cmd)
LOG.debug("Database service check: database query is responsive")
- return rd_instance.ServiceStatuses.HEALTHY
+ return service_status.ServiceStatuses.HEALTHY
except Exception:
return super(MySqlManager, self).get_service_status()
@@ -295,7 +295,7 @@ class MySqlManager(manager.Manager):
self.app.restore_backup(context, backup_info, restore_location)
except Exception:
LOG.error("Failed to restore from backup %s.", backup_info['id'])
- self.status.set_status(rd_instance.ServiceStatuses.FAILED)
+ self.status.set_status(service_status.ServiceStatuses.FAILED)
raise
LOG.info("Finished restore data from backup %s", backup_info['id'])
@@ -365,7 +365,7 @@ class MySqlManager(manager.Manager):
slave_config)
except Exception as err:
LOG.error("Error enabling replication, error: %s", str(err))
- self.status.set_status(rd_instance.ServiceStatuses.FAILED)
+ self.status.set_status(service_status.ServiceStatuses.FAILED)
raise
def detach_replica(self, context, for_failover=False):
@@ -431,3 +431,9 @@ class MySqlManager(manager.Manager):
def demote_replication_master(self, context):
LOG.info("Demoting replication master.")
self.replication.demote_master(self.app)
+
+ def upgrade(self, context, upgrade_info):
+ """Upgrade the database."""
+ LOG.info('Starting to upgrade database, upgrade_info: %s',
+ upgrade_info)
+ self.app.upgrade(upgrade_info)
diff --git a/trove/guestagent/datastore/mysql_common/service.py b/trove/guestagent/datastore/mysql_common/service.py
index c98b3279..77b10d81 100644
--- a/trove/guestagent/datastore/mysql_common/service.py
+++ b/trove/guestagent/datastore/mysql_common/service.py
@@ -27,7 +27,6 @@ from sqlalchemy.sql.expression import text
from trove.backup.state import BackupState
from trove.common import cfg
from trove.common import exception
-from trove.common import instance
from trove.common import utils
from trove.common.configurations import MySQLConfParser
from trove.common.db.mysql import models
@@ -43,6 +42,7 @@ from trove.guestagent.datastore import service
from trove.guestagent.datastore.mysql_common import service as commmon_service
from trove.guestagent.utils import docker as docker_util
from trove.guestagent.utils import mysql as mysql_util
+from trove.instance import service_status
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
@@ -77,24 +77,24 @@ class BaseMySqlAppStatus(service.BaseDbStatus):
cmd = 'mysql -uroot -p%s -e "select 1;"' % root_pass
try:
docker_util.run_command(self.docker_client, cmd)
- return instance.ServiceStatuses.HEALTHY
+ return service_status.ServiceStatuses.HEALTHY
except Exception as exc:
LOG.warning('Failed to run docker command, error: %s',
str(exc))
container_log = docker_util.get_container_logs(
self.docker_client, tail='all')
- LOG.warning('container log: %s', '\n'.join(container_log))
- return instance.ServiceStatuses.RUNNING
+ LOG.debug('container log: \n%s', '\n'.join(container_log))
+ return service_status.ServiceStatuses.RUNNING
elif status == "not running":
- return instance.ServiceStatuses.SHUTDOWN
+ return service_status.ServiceStatuses.SHUTDOWN
elif status == "paused":
- return instance.ServiceStatuses.PAUSED
+ return service_status.ServiceStatuses.PAUSED
elif status == "exited":
- return instance.ServiceStatuses.SHUTDOWN
+ return service_status.ServiceStatuses.SHUTDOWN
elif status == "dead":
- return instance.ServiceStatuses.CRASHED
+ return service_status.ServiceStatuses.CRASHED
else:
- return instance.ServiceStatuses.UNKNOWN
+ return service_status.ServiceStatuses.UNKNOWN
@six.add_metaclass(abc.ABCMeta)
@@ -638,8 +638,9 @@ class BaseMySqlApp(object):
raise exception.TroveError(_("Failed to start mysql"))
if not self.status.wait_for_real_status_to_change_to(
- instance.ServiceStatuses.HEALTHY,
- CONF.state_change_wait_time, update_db):
+ service_status.ServiceStatuses.HEALTHY,
+ CONF.state_change_wait_time, update_db
+ ):
raise exception.TroveError(_("Failed to start mysql"))
def start_db_with_conf_changes(self, config_contents):
@@ -662,7 +663,7 @@ class BaseMySqlApp(object):
raise exception.TroveError("Failed to stop mysql")
if not self.status.wait_for_real_status_to_change_to(
- instance.ServiceStatuses.SHUTDOWN,
+ service_status.ServiceStatuses.SHUTDOWN,
CONF.state_change_wait_time, update_db):
raise exception.TroveError("Failed to stop mysql")
@@ -714,7 +715,7 @@ class BaseMySqlApp(object):
raise exception.TroveError("Failed to restart mysql")
if not self.status.wait_for_real_status_to_change_to(
- instance.ServiceStatuses.HEALTHY,
+ service_status.ServiceStatuses.HEALTHY,
CONF.state_change_wait_time, update_db=False):
raise exception.TroveError("Failed to start mysql")
@@ -949,6 +950,20 @@ class BaseMySqlApp(object):
q = "set global read_only = %s" % read_only
client.execute(text(str(q)))
+ def upgrade(self, upgrade_info):
+ """Upgrade the database."""
+ new_version = upgrade_info.get('datastore_version')
+
+ LOG.info('Stopping db container for upgrade')
+ self.stop_db()
+
+ LOG.info('Deleting db container for upgrade')
+ docker_util.remove_container(self.docker_client)
+
+ LOG.info('Starting new db container with version %s for upgrade',
+ new_version)
+ self.start_db(update_db=True, ds_version=new_version)
+
class BaseMySqlRootAccess(object):
def __init__(self, mysql_app):
diff --git a/trove/guestagent/datastore/service.py b/trove/guestagent/datastore/service.py
index 63a5464e..7ac5fb03 100644
--- a/trove/guestagent/datastore/service.py
+++ b/trove/guestagent/datastore/service.py
@@ -20,11 +20,11 @@ from oslo_utils import timeutils
from trove.common import cfg
from trove.common import context as trove_context
-from trove.common import instance
from trove.common.i18n import _
from trove.conductor import api as conductor_api
from trove.guestagent.common import guestagent_utils
from trove.guestagent.common import operating_system
+from trove.instance import service_status
LOG = logging.getLogger(__name__)
CONF = cfg.CONF
@@ -74,7 +74,7 @@ class BaseDbStatus(object):
operating_system.write_file(prepare_start_file, '')
self.__refresh_prepare_completed()
- self.set_status(instance.ServiceStatuses.BUILDING, True)
+ self.set_status(service_status.ServiceStatuses.BUILDING, True)
def set_ready(self):
prepare_end_file = guestagent_utils.build_file_path(
@@ -92,9 +92,9 @@ class BaseDbStatus(object):
final_status = None
if error_occurred:
- final_status = instance.ServiceStatuses.FAILED
+ final_status = service_status.ServiceStatuses.FAILED
elif post_processing:
- final_status = instance.ServiceStatuses.INSTANCE_READY
+ final_status = service_status.ServiceStatuses.INSTANCE_READY
if final_status:
LOG.info("Set final status to %s.", final_status)
@@ -126,8 +126,8 @@ class BaseDbStatus(object):
def is_running(self):
"""True if DB server is running."""
return (self.status is not None and
- self.status in [instance.ServiceStatuses.RUNNING,
- instance.ServiceStatuses.HEALTHY])
+ self.status in [service_status.ServiceStatuses.RUNNING,
+ service_status.ServiceStatuses.HEALTHY])
def set_status(self, status, force=False):
"""Use conductor to update the DB app status."""
@@ -199,7 +199,7 @@ class BaseDbStatus(object):
"""
LOG.debug("Waiting for database to start up.")
if not self._wait_for_database_service_status(
- instance.ServiceStatuses.RUNNING, timeout, update_db):
+ service_status.ServiceStatuses.RUNNING, timeout, update_db):
raise RuntimeError(_("Database failed to start."))
LOG.info("Database has started successfully.")
@@ -229,7 +229,7 @@ class BaseDbStatus(object):
LOG.debug("Waiting for database to shutdown.")
if not self._wait_for_database_service_status(
- instance.ServiceStatuses.SHUTDOWN, timeout, update_db):
+ service_status.ServiceStatuses.SHUTDOWN, timeout, update_db):
raise RuntimeError(_("Database failed to stop."))
LOG.info("Database has stopped successfully.")
@@ -283,9 +283,19 @@ class BaseDbStatus(object):
# outside.
loop = True
+ # We need 3 (by default) consecutive success db connections for status
+ # 'HEALTHY'
+ healthy_count = 0
+
while loop:
self.status = self.get_actual_db_status()
if self.status == status:
+ if (status == service_status.ServiceStatuses.HEALTHY and
+ healthy_count < 2):
+ healthy_count += 1
+ time.sleep(CONF.state_change_poll_time)
+ continue
+
if update_db:
self.set_status(self.status)
return True