diff options
author | Lingxian Kong <anlin.kong@gmail.com> | 2020-09-02 10:10:23 +1200 |
---|---|---|
committer | Lingxian Kong <anlin.kong@gmail.com> | 2020-09-07 20:40:56 +1200 |
commit | 4fb41b5198c865b46a02dd72501d12e60ec10dd6 (patch) | |
tree | 663e32e8cf216201c17d1dc25201d992eb249787 /trove/guestagent | |
parent | 768ec34dfef660f133f87218a6246a9ce111bcb5 (diff) | |
download | trove-4fb41b5198c865b46a02dd72501d12e60ec10dd6.tar.gz |
Postgresql: Backup and restore
Change-Id: Icf08b7dc82ce501d82b45cf5412256a43716b6ae
Diffstat (limited to 'trove/guestagent')
-rw-r--r-- | trove/guestagent/common/operating_system.py | 44 | ||||
-rw-r--r-- | trove/guestagent/datastore/manager.py | 19 | ||||
-rw-r--r-- | trove/guestagent/datastore/mysql_common/manager.py | 33 | ||||
-rw-r--r-- | trove/guestagent/datastore/mysql_common/service.py | 104 | ||||
-rw-r--r-- | trove/guestagent/datastore/postgres/manager.py | 47 | ||||
-rw-r--r-- | trove/guestagent/datastore/postgres/query.py | 2 | ||||
-rw-r--r-- | trove/guestagent/datastore/postgres/service.py | 66 | ||||
-rw-r--r-- | trove/guestagent/datastore/service.py | 112 |
8 files changed, 264 insertions, 163 deletions
diff --git a/trove/guestagent/common/operating_system.py b/trove/guestagent/common/operating_system.py index 93eab9b2..433ed603 100644 --- a/trove/guestagent/common/operating_system.py +++ b/trove/guestagent/common/operating_system.py @@ -480,7 +480,7 @@ def service_discovery(service_candidates): return result -def _execute_shell_cmd(cmd, options, *args, **kwargs): +def execute_shell_cmd(cmd, options, *args, **kwargs): """Execute a given shell command passing it given options (flags) and arguments. @@ -519,7 +519,7 @@ def ensure_directory(dir_path, user=None, group=None, force=True, **kwargs): """Create a given directory and update its ownership (recursively) to the given user and group if any. - seealso:: _execute_shell_cmd for valid optional keyword arguments. + seealso:: execute_shell_cmd for valid optional keyword arguments. :param dir_path: Path to the created directory. :type dir_path: string @@ -549,7 +549,7 @@ def ensure_directory(dir_path, user=None, group=None, force=True, **kwargs): def chown(path, user, group, recursive=True, force=False, **kwargs): """Changes the owner and group of a given file. - seealso:: _execute_shell_cmd for valid optional keyword arguments. + seealso:: execute_shell_cmd for valid optional keyword arguments. :param path: Path to the modified file. :type path: string @@ -579,7 +579,7 @@ def chown(path, user, group, recursive=True, force=False, **kwargs): owner_group_modifier = _build_user_group_pair(user, group) options = (('f', force), ('R', recursive)) - _execute_shell_cmd('chown', options, owner_group_modifier, path, **kwargs) + execute_shell_cmd('chown', options, owner_group_modifier, path, **kwargs) def _build_user_group_pair(user, group): @@ -599,14 +599,14 @@ def _create_directory(dir_path, force=True, **kwargs): """ options = (('p', force),) - _execute_shell_cmd('mkdir', options, dir_path, **kwargs) + execute_shell_cmd('mkdir', options, dir_path, **kwargs) def chmod(path, mode, recursive=True, force=False, **kwargs): """Changes the mode of a given file. :seealso: Modes for more information on the representation of modes. - :seealso: _execute_shell_cmd for valid optional keyword arguments. + :seealso: execute_shell_cmd for valid optional keyword arguments. :param path: Path to the modified file. :type path: string @@ -629,7 +629,7 @@ def chmod(path, mode, recursive=True, force=False, **kwargs): if path: options = (('f', force), ('R', recursive)) shell_modes = _build_shell_chmod_mode(mode) - _execute_shell_cmd('chmod', options, shell_modes, path, **kwargs) + execute_shell_cmd('chmod', options, shell_modes, path, **kwargs) else: raise exception.UnprocessableEntity( _("Cannot change mode of a blank file.")) @@ -639,7 +639,7 @@ def change_user_group(user, group, append=True, add_group=True, **kwargs): """Adds a user to groups by using the usermod linux command with -a and -G options. - seealso:: _execute_shell_cmd for valid optional keyword arguments. + seealso:: execute_shell_cmd for valid optional keyword arguments. :param user: Username. :type user: string @@ -668,7 +668,7 @@ def change_user_group(user, group, append=True, add_group=True, **kwargs): raise exception.UnprocessableEntity(_("Missing group.")) options = (('a', append), ('G', add_group)) - _execute_shell_cmd('usermod', options, group, user, **kwargs) + execute_shell_cmd('usermod', options, group, user, **kwargs) def _build_shell_chmod_mode(mode): @@ -704,7 +704,7 @@ def _build_shell_chmod_mode(mode): def remove(path, force=False, recursive=True, **kwargs): """Remove a given file or directory. - :seealso: _execute_shell_cmd for valid optional keyword arguments. + :seealso: execute_shell_cmd for valid optional keyword arguments. :param path: Path to the removed file. :type path: string @@ -720,7 +720,7 @@ def remove(path, force=False, recursive=True, **kwargs): if path: options = (('f', force), ('R', recursive)) - _execute_shell_cmd('rm', options, path, **kwargs) + execute_shell_cmd('rm', options, path, **kwargs) else: raise exception.UnprocessableEntity(_("Cannot remove a blank file.")) @@ -730,7 +730,7 @@ def move(source, destination, force=False, **kwargs): Move attempts to preserve the original ownership, permissions and timestamps. - :seealso: _execute_shell_cmd for valid optional keyword arguments. + :seealso: execute_shell_cmd for valid optional keyword arguments. :param source: Path to the source location. :type source: string @@ -751,7 +751,7 @@ def move(source, destination, force=False, **kwargs): raise exception.UnprocessableEntity(_("Missing destination path.")) options = (('f', force),) - _execute_shell_cmd('mv', options, source, destination, **kwargs) + execute_shell_cmd('mv', options, source, destination, **kwargs) def copy(source, destination, force=False, preserve=False, recursive=True, @@ -761,7 +761,7 @@ def copy(source, destination, force=False, preserve=False, recursive=True, Copy does NOT attempt to preserve ownership, permissions and timestamps unless the 'preserve' option is enabled. - :seealso: _execute_shell_cmd for valid optional keyword arguments. + :seealso: execute_shell_cmd for valid optional keyword arguments. :param source: Path to the source location. :type source: string @@ -793,7 +793,7 @@ def copy(source, destination, force=False, preserve=False, recursive=True, options = (('f', force), ('p', preserve), ('R', recursive), ('L', dereference)) - _execute_shell_cmd('cp', options, source, destination, **kwargs) + execute_shell_cmd('cp', options, source, destination, **kwargs) def get_bytes_free_on_fs(path): @@ -830,7 +830,7 @@ def list_files_in_directory(root_dir, recursive=False, pattern=None, if pattern: cmd_args.extend(['-regextype', 'posix-extended', '-regex', os.path.join('.*', pattern) + '$']) - files = _execute_shell_cmd('find', [], *cmd_args, as_root=True) + files = execute_shell_cmd('find', [], *cmd_args, as_root=True) return {fp for fp in files.splitlines()} return {os.path.abspath(os.path.join(root, name)) @@ -851,7 +851,7 @@ def _build_command_options(options): def get_device(path, as_root=False): """Get the device that a given path exists on.""" - stdout = _execute_shell_cmd('df', [], path, as_root=as_root) + stdout = execute_shell_cmd('df', [], path, as_root=as_root) return stdout.splitlines()[1].split()[0] @@ -879,8 +879,8 @@ def create_user(user_name, user_id, group_name=None, group_id=None): group_id = group_id or user_id try: - _execute_shell_cmd('groupadd', [], '--gid', group_id, group_name, - as_root=True) + execute_shell_cmd('groupadd', [], '--gid', group_id, group_name, + as_root=True) except exception.ProcessExecutionError as err: if 'already exists' not in err.stderr: raise exception.UnprocessableEntity( @@ -888,8 +888,8 @@ def create_user(user_name, user_id, group_name=None, group_id=None): ) try: - _execute_shell_cmd('useradd', [], '--uid', user_id, '--gid', group_id, - '-M', user_name, as_root=True) + execute_shell_cmd('useradd', [], '--uid', user_id, '--gid', group_id, + '-M', user_name, as_root=True) except exception.ProcessExecutionError as err: if 'already exists' not in err.stderr: raise exception.UnprocessableEntity( @@ -903,4 +903,4 @@ def remove_dir_contents(folder): Use shell=True here because shell=False doesn't support '*' """ path = os.path.join(folder, '*') - _execute_shell_cmd(f'rm -rf {path}', [], shell=True, as_root=True) + execute_shell_cmd(f'rm -rf {path}', [], shell=True, as_root=True) diff --git a/trove/guestagent/datastore/manager.py b/trove/guestagent/datastore/manager.py index b6d57a05..60020f72 100644 --- a/trove/guestagent/datastore/manager.py +++ b/trove/guestagent/datastore/manager.py @@ -303,6 +303,9 @@ class Manager(periodic_task.PeriodicTasks): LOG.info('No post_prepare work has been defined.') pass + def stop_db(self, context): + self.app.stop_db() + def restart(self, context): self.app.restart() @@ -736,12 +739,20 @@ class Manager(periodic_task.PeriodicTasks): :param backup_info: a dictionary containing the db instance id of the backup task, location, type, and other data. """ - with EndNotification(context): - self.app.create_backup(context, backup_info) + pass def perform_restore(self, context, restore_location, backup_info): - raise exception.DatastoreOperationNotSupported( - operation='_perform_restore', datastore=self.manager) + LOG.info("Starting to restore database from backup %s, " + "backup_info: %s", backup_info['id'], backup_info) + + try: + self.app.restore_backup(context, backup_info, restore_location) + except Exception: + LOG.error("Failed to restore from backup %s.", backup_info['id']) + self.status.set_status(service_status.ServiceStatuses.FAILED) + raise + + LOG.info("Finished restore data from backup %s", backup_info['id']) ################ # Database and user management diff --git a/trove/guestagent/datastore/mysql_common/manager.py b/trove/guestagent/datastore/mysql_common/manager.py index 832be165..93589408 100644 --- a/trove/guestagent/datastore/mysql_common/manager.py +++ b/trove/guestagent/datastore/mysql_common/manager.py @@ -23,6 +23,7 @@ from trove.common import cfg from trove.common import configurations from trove.common import exception from trove.common import utils +from trove.common.notification import EndNotification from trove.guestagent import guest_log from trove.guestagent.common import operating_system from trove.guestagent.datastore import manager @@ -119,12 +120,25 @@ class MySqlManager(manager.Manager): # This instance is a replication slave self.attach_replica(context, snapshot, snapshot['config']) - def stop_db(self, context): - self.app.stop_db() - def start_db_with_conf_changes(self, context, config_contents, ds_version): self.app.start_db_with_conf_changes(config_contents, ds_version) + def create_backup(self, context, backup_info): + """Create backup for the database. + + :param context: User context object. + :param backup_info: a dictionary containing the db instance id of the + backup task, location, type, and other data. + """ + LOG.info(f"Creating backup {backup_info['id']}") + with EndNotification(context): + volumes_mapping = { + '/var/lib/mysql': {'bind': '/var/lib/mysql', 'mode': 'rw'} + } + self.app.create_backup(context, backup_info, + volumes_mapping=volumes_mapping, + need_dbuser=True) + def get_datastore_log_defs(self): owner = cfg.get_configuration_property('database_service_uid') datastore_dir = self.app.get_data_dir() @@ -189,19 +203,6 @@ class MySqlManager(manager.Manager): LOG.info("Applying overrides (%s).", overrides) self.app.apply_overrides(overrides) - def perform_restore(self, context, restore_location, backup_info): - LOG.info("Starting to restore database from backup %s, " - "backup_info: %s", backup_info['id'], backup_info) - - try: - self.app.restore_backup(context, backup_info, restore_location) - except Exception: - LOG.error("Failed to restore from backup %s.", backup_info['id']) - self.status.set_status(service_status.ServiceStatuses.FAILED) - raise - - LOG.info("Finished restore data from backup %s", backup_info['id']) - def reset_password_for_restore(self, ds_version=None, data_dir='/var/lib/mysql/data'): """Reset the root password after restore the db data. diff --git a/trove/guestagent/datastore/mysql_common/service.py b/trove/guestagent/datastore/mysql_common/service.py index d9b569ec..67b21454 100644 --- a/trove/guestagent/datastore/mysql_common/service.py +++ b/trove/guestagent/datastore/mysql_common/service.py @@ -17,21 +17,18 @@ import re from oslo_log import log as logging from oslo_utils import encodeutils -from oslo_utils import timeutils import six from six.moves import urllib import sqlalchemy from sqlalchemy import exc from sqlalchemy.sql.expression import text -from trove.backup.state import BackupState from trove.common import cfg from trove.common import exception from trove.common import utils from trove.common.configurations import MySQLConfParser from trove.common.db.mysql import models from trove.common.i18n import _ -from trove.conductor import api as conductor_api from trove.guestagent.common import guestagent_utils from trove.guestagent.common import operating_system from trove.guestagent.common import sql_query @@ -663,107 +660,6 @@ class BaseMySqlApp(service.BaseDbApp): LOG.info("Finished restarting mysql") - def create_backup(self, context, backup_info): - storage_driver = CONF.storage_strategy - backup_driver = cfg.get_configuration_property('backup_strategy') - incremental = '' - backup_type = 'full' - if backup_info.get('parent'): - incremental = ( - f'--incremental ' - f'--parent-location={backup_info["parent"]["location"]} ' - f'--parent-checksum={backup_info["parent"]["checksum"]}') - backup_type = 'incremental' - - backup_id = backup_info["id"] - image = cfg.get_configuration_property('backup_docker_image') - name = 'db_backup' - volumes = {'/var/lib/mysql': {'bind': '/var/lib/mysql', 'mode': 'rw'}} - admin_pass = self.get_auth_password() - user_token = context.auth_token - auth_url = CONF.service_credentials.auth_url - user_tenant = context.project_id - - swift_metadata = ( - f'datastore:{backup_info["datastore"]},' - f'datastore_version:{backup_info["datastore_version"]}' - ) - swift_container = backup_info.get('swift_container', - CONF.backup_swift_container) - swift_params = (f'--swift-extra-metadata={swift_metadata} ' - f'--swift-container {swift_container}') - - command = ( - f'/usr/bin/python3 main.py --backup --backup-id={backup_id} ' - f'--storage-driver={storage_driver} --driver={backup_driver} ' - f'--db-user=os_admin --db-password={admin_pass} ' - f'--db-host=127.0.0.1 ' - f'--os-token={user_token} --os-auth-url={auth_url} ' - f'--os-tenant-id={user_tenant} ' - f'{swift_params} ' - f'{incremental}' - ) - - # Update backup status in db - conductor = conductor_api.API(context) - mount_point = CONF.get(CONF.datastore_manager).mount_point - stats = guestagent_utils.get_filesystem_volume_stats(mount_point) - backup_state = { - 'backup_id': backup_id, - 'size': stats.get('used', 0.0), - 'state': BackupState.BUILDING, - 'backup_type': backup_type - } - conductor.update_backup(CONF.guest_id, - sent=timeutils.utcnow_ts(microsecond=True), - **backup_state) - LOG.debug("Updated state for %s to %s.", backup_id, backup_state) - - # Start to run backup inside a separate docker container - try: - LOG.info('Starting to create backup %s, command: %s', backup_id, - command) - output, ret = docker_util.run_container( - self.docker_client, image, name, - volumes=volumes, command=command) - result = output[-1] - if not ret: - msg = f'Failed to run backup container, error: {result}' - LOG.error(msg) - raise Exception(msg) - - backup_result = BACKUP_LOG.match(result) - if backup_result: - backup_state.update({ - 'checksum': backup_result.group('checksum'), - 'location': backup_result.group('location'), - 'success': True, - 'state': BackupState.COMPLETED, - }) - else: - LOG.error(f'Cannot parse backup output: {result}') - backup_state.update({ - 'success': False, - 'state': BackupState.FAILED, - }) - except Exception as err: - LOG.error("Failed to create backup %s", backup_id) - backup_state.update({ - 'success': False, - 'state': BackupState.FAILED, - }) - raise exception.TroveError( - "Failed to create backup %s, error: %s" % - (backup_id, str(err)) - ) - finally: - LOG.info("Completed backup %s.", backup_id) - conductor.update_backup(CONF.guest_id, - sent=timeutils.utcnow_ts( - microsecond=True), - **backup_state) - LOG.debug("Updated state for %s to %s.", backup_id, backup_state) - def restore_backup(self, context, backup_info, restore_location): backup_id = backup_info['id'] storage_driver = CONF.storage_strategy diff --git a/trove/guestagent/datastore/postgres/manager.py b/trove/guestagent/datastore/postgres/manager.py index 1226b785..0169deeb 100644 --- a/trove/guestagent/datastore/postgres/manager.py +++ b/trove/guestagent/datastore/postgres/manager.py @@ -16,10 +16,11 @@ import os from oslo_log import log as logging from trove.common import cfg +from trove.common.notification import EndNotification +from trove.guestagent import guest_log from trove.guestagent.common import operating_system -from trove.guestagent.datastore.postgres import service from trove.guestagent.datastore import manager -from trove.guestagent import guest_log +from trove.guestagent.datastore.postgres import service LOG = logging.getLogger(__name__) CONF = cfg.CONF @@ -45,17 +46,26 @@ class PostgresManager(manager.Manager): user=CONF.database_service_uid, group=CONF.database_service_uid, as_root=True) + operating_system.ensure_directory(service.WAL_ARCHIVE_DIR, + user=CONF.database_service_uid, + group=CONF.database_service_uid, + as_root=True) LOG.info('Preparing database config files') self.app.configuration_manager.save_configuration(config_contents) self.app.set_data_dir(self.app.datadir) self.app.update_overrides(overrides) - # # Restore data from backup and reset root password - # if backup_info: - # self.perform_restore(context, data_dir, backup_info) - # self.reset_password_for_restore(ds_version=ds_version, - # data_dir=data_dir) + # Restore data from backup and reset root password + if backup_info: + self.perform_restore(context, self.app.datadir, backup_info) + + signal_file = f"{self.app.datadir}/recovery.signal" + operating_system.execute_shell_cmd( + f"touch {signal_file}", [], shell=True, as_root=True) + operating_system.chown(signal_file, CONF.database_service_uid, + CONF.database_service_uid, force=True, + as_root=True) # config_file can only be set on the postgres command line command = f"postgres -c config_file={service.CONFIG_FILE}" @@ -101,3 +111,26 @@ class PostgresManager(manager.Manager): def is_log_enabled(self, logname): return self.configuration_manager.get_value('logging_collector', False) + + def create_backup(self, context, backup_info): + """Create backup for the database. + + :param context: User context object. + :param backup_info: a dictionary containing the db instance id of the + backup task, location, type, and other data. + """ + LOG.info(f"Creating backup {backup_info['id']}") + with EndNotification(context): + volumes_mapping = { + '/var/lib/postgresql/data': { + 'bind': '/var/lib/postgresql/data', 'mode': 'rw' + }, + "/var/run/postgresql": {"bind": "/var/run/postgresql", + "mode": "ro"}, + } + extra_params = f"--pg-wal-archive-dir {service.WAL_ARCHIVE_DIR}" + + self.app.create_backup(context, backup_info, + volumes_mapping=volumes_mapping, + need_dbuser=False, + extra_params=extra_params) diff --git a/trove/guestagent/datastore/postgres/query.py b/trove/guestagent/datastore/postgres/query.py index 96f3bc44..8634dc97 100644 --- a/trove/guestagent/datastore/postgres/query.py +++ b/trove/guestagent/datastore/postgres/query.py @@ -138,7 +138,7 @@ class UserQuery(object): @classmethod def drop(cls, name): """Query to drop a user.""" - return f'DROP USER "{name}"' + return f'DROP USER IF EXISTS "{name}"' class AccessQuery(object): diff --git a/trove/guestagent/datastore/postgres/service.py b/trove/guestagent/datastore/postgres/service.py index 589d7e18..f7bb5db3 100644 --- a/trove/guestagent/datastore/postgres/service.py +++ b/trove/guestagent/datastore/postgres/service.py @@ -39,6 +39,8 @@ CNF_EXT = 'conf' # The same with include_dir config option CNF_INCLUDE_DIR = '/etc/postgresql/conf.d' HBA_CONFIG_FILE = '/etc/postgresql/pg_hba.conf' +# The same with the path in archive_command config option. +WAL_ARCHIVE_DIR = '/var/lib/postgresql/data/wal_archive' class PgSqlAppStatus(service.BaseDbStatus): @@ -113,6 +115,8 @@ class PgSqlApp(service.BaseDbApp): admin_password = utils.generate_random_password() os_admin = models.PostgreSQLUser(ADMIN_USER_NAME, admin_password) + # Drop os_admin user if exists, this is needed for restore. + PgSqlAdmin(SUPER_USER_NAME).delete_user({'_name': ADMIN_USER_NAME}) PgSqlAdmin(SUPER_USER_NAME).create_admin_user(os_admin, encrypt_password=True) self.save_password(ADMIN_USER_NAME, admin_password) @@ -176,9 +180,9 @@ class PgSqlApp(service.BaseDbApp): command = command if command else '' try: - root_pass = self.get_auth_password(file="root.cnf") + postgres_pass = self.get_auth_password(file="postgres.cnf") except exception.UnprocessableEntity: - root_pass = utils.generate_random_password() + postgres_pass = utils.generate_random_password() # Get uid and gid user = "%s:%s" % (CONF.database_service_uid, CONF.database_service_uid) @@ -211,7 +215,7 @@ class PgSqlApp(service.BaseDbApp): network_mode="host", user=user, environment={ - "POSTGRES_PASSWORD": root_pass, + "POSTGRES_PASSWORD": postgres_pass, "PGDATA": self.datadir, }, command=command @@ -219,7 +223,7 @@ class PgSqlApp(service.BaseDbApp): # Save root password LOG.debug("Saving root credentials to local host.") - self.save_password('postgres', root_pass) + self.save_password('postgres', postgres_pass) except Exception: LOG.exception("Failed to start database service") raise exception.TroveError("Failed to start database service") @@ -254,6 +258,55 @@ class PgSqlApp(service.BaseDbApp): LOG.info("Finished restarting database") + def restore_backup(self, context, backup_info, restore_location): + backup_id = backup_info['id'] + storage_driver = CONF.storage_strategy + backup_driver = cfg.get_configuration_property('backup_strategy') + image = cfg.get_configuration_property('backup_docker_image') + name = 'db_restore' + volumes = { + '/var/lib/postgresql/data': { + 'bind': '/var/lib/postgresql/data', + 'mode': 'rw' + } + } + + os_cred = (f"--os-token={context.auth_token} " + f"--os-auth-url={CONF.service_credentials.auth_url} " + f"--os-tenant-id={context.project_id}") + + command = ( + f'/usr/bin/python3 main.py --nobackup ' + f'--storage-driver={storage_driver} --driver={backup_driver} ' + f'{os_cred} ' + f'--restore-from={backup_info["location"]} ' + f'--restore-checksum={backup_info["checksum"]} ' + f'--pg-wal-archive-dir {WAL_ARCHIVE_DIR}' + ) + + LOG.debug('Stop the database and clean up the data before restore ' + 'from %s', backup_id) + self.stop_db() + for dir in [WAL_ARCHIVE_DIR, self.datadir]: + operating_system.remove_dir_contents(dir) + + # Start to run restore inside a separate docker container + LOG.info('Starting to restore backup %s, command: %s', backup_id, + command) + output, ret = docker_util.run_container( + self.docker_client, image, name, + volumes=volumes, command=command) + result = output[-1] + if not ret: + msg = f'Failed to run restore container, error: {result}' + LOG.error(msg) + raise Exception(msg) + + for dir in [WAL_ARCHIVE_DIR, self.datadir]: + operating_system.chown(dir, CONF.database_service_uid, + CONF.database_service_uid, force=True, + as_root=True) + class PgSqlAdmin(object): # Default set of options of an administrative account. @@ -352,10 +405,7 @@ class PgSqlAdmin(object): Return a list of serialized Postgres databases. """ user = self._find_user(username) - if user is not None: - return user.databases - - raise exception.UserNotFound(username) + return user.databases if user is not None else [] def create_databases(self, databases): """Create the list of specified databases. diff --git a/trove/guestagent/datastore/service.py b/trove/guestagent/datastore/service.py index 38f4c2fa..8f4c6bd6 100644 --- a/trove/guestagent/datastore/service.py +++ b/trove/guestagent/datastore/service.py @@ -13,16 +13,18 @@ # License for the specific language governing permissions and limitations # under the License. import os +import re import time from oslo_log import log as logging from oslo_utils import timeutils +from trove.backup.state import BackupState from trove.common import cfg from trove.common import context as trove_context from trove.common import exception -from trove.common.i18n import _ from trove.common import stream_codecs +from trove.common.i18n import _ from trove.conductor import api as conductor_api from trove.guestagent.common import guestagent_utils from trove.guestagent.common import operating_system @@ -31,6 +33,8 @@ from trove.instance import service_status LOG = logging.getLogger(__name__) CONF = cfg.CONF +BACKUP_LOG_RE = re.compile(r'.*Backup successfully, checksum: ' + r'(?P<checksum>.*), location: (?P<location>.*)') class BaseDbStatus(object): @@ -401,3 +405,109 @@ class BaseDbApp(object): self.reset_configuration(config_contents) self.start_db(update_db=True, ds_version=ds_version) + + def create_backup(self, context, backup_info, volumes_mapping={}, + need_dbuser=True, extra_params=''): + storage_driver = CONF.storage_strategy + backup_driver = cfg.get_configuration_property('backup_strategy') + incremental = '' + backup_type = 'full' + if backup_info.get('parent'): + incremental = ( + f'--incremental ' + f'--parent-location={backup_info["parent"]["location"]} ' + f'--parent-checksum={backup_info["parent"]["checksum"]}') + backup_type = 'incremental' + + backup_id = backup_info["id"] + image = cfg.get_configuration_property('backup_docker_image') + name = 'db_backup' + + os_cred = (f"--os-token={context.auth_token} " + f"--os-auth-url={CONF.service_credentials.auth_url} " + f"--os-tenant-id={context.project_id}") + + db_userinfo = '' + if need_dbuser: + admin_pass = self.get_auth_password() + db_userinfo = (f"--db-host=127.0.0.1 --db-user=os_admin " + f"--db-password={admin_pass}") + + swift_metadata = ( + f'datastore:{backup_info["datastore"]},' + f'datastore_version:{backup_info["datastore_version"]}' + ) + swift_container = (backup_info.get('swift_container') or + CONF.backup_swift_container) + swift_params = (f'--swift-extra-metadata={swift_metadata} ' + f'--swift-container {swift_container}') + + command = ( + f'/usr/bin/python3 main.py --backup --backup-id={backup_id} ' + f'--storage-driver={storage_driver} --driver={backup_driver} ' + f'{os_cred} ' + f'{db_userinfo} ' + f'{swift_params} ' + f'{incremental} ' + f'{extra_params} ' + ) + + # Update backup status in db + conductor = conductor_api.API(context) + mount_point = cfg.get_configuration_property('mount_point') + stats = guestagent_utils.get_filesystem_volume_stats(mount_point) + backup_state = { + 'backup_id': backup_id, + 'size': stats.get('used', 0.0), + 'state': BackupState.BUILDING, + 'backup_type': backup_type + } + conductor.update_backup(CONF.guest_id, + sent=timeutils.utcnow_ts(microsecond=True), + **backup_state) + LOG.debug(f"Updated state for backup {backup_id} to {backup_state}") + + # Start to run backup inside a separate docker container + try: + LOG.info(f'Starting to create backup {backup_id}, ' + f'command: {command}') + output, ret = docker_util.run_container( + self.docker_client, image, name, + volumes=volumes_mapping, command=command) + result = output[-1] + if not ret: + msg = f'Failed to run backup container, error: {result}' + LOG.error(msg) + raise Exception(msg) + + backup_result = BACKUP_LOG_RE.match(result) + if backup_result: + backup_state.update({ + 'checksum': backup_result.group('checksum'), + 'location': backup_result.group('location'), + 'success': True, + 'state': BackupState.COMPLETED, + }) + else: + LOG.error(f'Cannot parse backup output: {result}') + backup_state.update({ + 'success': False, + 'state': BackupState.FAILED, + }) + except Exception as err: + LOG.error("Failed to create backup %s", backup_id) + backup_state.update({ + 'success': False, + 'state': BackupState.FAILED, + }) + raise exception.TroveError( + "Failed to create backup %s, error: %s" % + (backup_id, str(err)) + ) + finally: + LOG.info("Completed backup %s.", backup_id) + conductor.update_backup( + CONF.guest_id, + sent=timeutils.utcnow_ts(microsecond=True), + **backup_state) + LOG.debug("Updated state for %s to %s.", backup_id, backup_state) |