diff options
author | Dan Smith <dansmith@redhat.com> | 2016-09-27 10:17:00 -0700 |
---|---|---|
committer | Dan Smith <dansmith@redhat.com> | 2016-09-28 07:08:44 -0700 |
commit | 76d1b24c00a4dc24c9bc3290fca513b5ece7247a (patch) | |
tree | 58330530396022e1082855192bb4397437bbbdde | |
parent | dd30603f91e6fd3d1a4db452f20a51ba8820e1f4 (diff) | |
download | nova-76d1b24c00a4dc24c9bc3290fca513b5ece7247a.tar.gz |
Archive instance-related rows when the parent instance is deleted
This is something I expect has been very broken for a long time. We
have rows in tables such as instance_extra, instance_faults, etc that
pertain to a single instance, and thus have a foreign key on their
instance_uuid column that points to the instance. If any of those
records exist, an instance can not be archived out of the main
instances table.
The archive routine currently "handles" this by skipping over said
instances, and eventually iterating over all the tables to pull out
any records that point to that instance, thus freeing up the instance
itself for archival. The problem is, this only happens if those extra
records are actually marked as deleted themselves. If we fail during
a cleanup routine and leave some of them not marked as deleted, but
where the instance they reference *is* marked as deleted, we will
never archive them.
This patch adds another phase of the archival process for any table
that has an "instance_uuid" column, which attempts to archive records
that point to these deleted instances. With this, using a very large
real world sample database, I was able to archive my way down to
zero deleted, un-archivable instances (from north of 100k).
Conflicts:
nova/db/sqlalchemy/api.py (indentation change)
Closes-Bug: #1622545
Change-Id: I77255c77780f0c2b99d59a9c20adecc85335bb18
(cherry picked from commit ceaf853894352b6d0ae12efe85ba5eb4e651e58a)
-rw-r--r-- | nova/db/sqlalchemy/api.py | 59 | ||||
-rw-r--r-- | nova/tests/functional/db/test_archive.py | 42 |
2 files changed, 96 insertions, 5 deletions
diff --git a/nova/db/sqlalchemy/api.py b/nova/db/sqlalchemy/api.py index b942b5cc16..30bbb09c06 100644 --- a/nova/db/sqlalchemy/api.py +++ b/nova/db/sqlalchemy/api.py @@ -6293,6 +6293,49 @@ def task_log_end_task(context, task_name, period_beginning, period_ending, ################## +def _archive_if_instance_deleted(table, shadow_table, instances, conn, + max_rows): + """Look for records that pertain to deleted instances, but may not be + deleted themselves. This catches cases where we delete an instance, + but leave some residue because of a failure in a cleanup path or + similar. + + Logic is: if I have a column called instance_uuid, and that instance + is deleted, then I can be deleted. + """ + # NOTE(guochbo): There is a circular import, nova.db.sqlalchemy.utils + # imports nova.db.sqlalchemy.api. + from nova.db.sqlalchemy import utils as db_utils + + query_insert = shadow_table.insert(inline=True).\ + from_select( + [c.name for c in table.c], + sql.select( + [table], + and_(instances.c.deleted != instances.c.deleted.default.arg, + instances.c.uuid == table.c.instance_uuid)). + order_by(table.c.id).limit(max_rows)) + + query_delete = sql.select( + [table.c.id], + and_(instances.c.deleted != instances.c.deleted.default.arg, + instances.c.uuid == table.c.instance_uuid)).\ + order_by(table.c.id).limit(max_rows) + delete_statement = db_utils.DeleteFromSelect(table, query_delete, + table.c.id) + + try: + with conn.begin(): + conn.execute(query_insert) + result_delete = conn.execute(delete_statement) + return result_delete.rowcount + except db_exc.DBReferenceError as ex: + LOG.warning(_LW('Failed to archive %(table)s: %(error)s'), + {'table': table.__tablename__, + 'error': six.text_type(ex)}) + return 0 + + def _archive_deleted_rows_for_table(tablename, max_rows): """Move up to max_rows rows from one tables to the corresponding shadow table. @@ -6375,16 +6418,22 @@ def _archive_deleted_rows_for_table(tablename, max_rows): with conn.begin(): conn.execute(insert) result_delete = conn.execute(delete_statement) + rows_archived = result_delete.rowcount except db_exc.DBReferenceError as ex: # A foreign key constraint keeps us from deleting some of # these rows until we clean up a dependent table. Just # skip this table for now; we'll come back to it later. LOG.warning(_LW("IntegrityError detected when archiving table " - "%(tablename)s: %(error)s"), - {'tablename': tablename, 'error': six.text_type(ex)}) - return rows_archived - - rows_archived = result_delete.rowcount + "%(tablename)s: %(error)s"), + {'tablename': tablename, 'error': six.text_type(ex)}) + + if ((max_rows is None or rows_archived < max_rows) + and 'instance_uuid' in columns): + instances = models.BASE.metadata.tables['instances'] + limit = max_rows - rows_archived if max_rows is not None else None + extra = _archive_if_instance_deleted(table, shadow_table, instances, + conn, limit) + rows_archived += extra return rows_archived diff --git a/nova/tests/functional/db/test_archive.py b/nova/tests/functional/db/test_archive.py index 8d482c42b7..15e089c3e8 100644 --- a/nova/tests/functional/db/test_archive.py +++ b/nova/tests/functional/db/test_archive.py @@ -102,3 +102,45 @@ class TestDatabaseArchive(test_servers.ServersTestBase): # by the archive self.assertIn('instance_actions', results) self.assertIn('instance_actions_events', results) + + def test_archive_deleted_rows_with_undeleted_residue(self): + # Boots a server, deletes it, and then tries to archive it. + server = self._create_server() + server_id = server['id'] + # Assert that there are instance_actions. instance_actions are + # interesting since we don't soft delete them but they have a foreign + # key back to the instances table. + actions = self.api.get_instance_actions(server_id) + self.assertTrue(len(actions), + 'No instance actions for server: %s' % server_id) + self._delete_server(server_id) + # Verify we have the soft deleted instance in the database. + admin_context = context.get_admin_context(read_deleted='yes') + # This will raise InstanceNotFound if it's not found. + instance = db.instance_get_by_uuid(admin_context, server_id) + # Make sure it's soft deleted. + self.assertNotEqual(0, instance.deleted) + # Undelete the instance_extra record to make sure we delete it anyway + extra = db.instance_extra_get_by_instance_uuid(admin_context, + instance.uuid) + self.assertNotEqual(0, extra.deleted) + db.instance_extra_update_by_uuid(admin_context, instance.uuid, + {'deleted': 0}) + extra = db.instance_extra_get_by_instance_uuid(admin_context, + instance.uuid) + self.assertEqual(0, extra.deleted) + # Verify we have some system_metadata since we'll check that later. + self.assertTrue(len(instance.system_metadata), + 'No system_metadata for instance: %s' % server_id) + # Now try and archive the soft deleted records. + results = db.archive_deleted_rows(max_rows=100) + # verify system_metadata was dropped + self.assertIn('instance_system_metadata', results) + self.assertEqual(len(instance.system_metadata), + results['instance_system_metadata']) + # Verify that instances rows are dropped + self.assertIn('instances', results) + # Verify that instance_actions and actions_event are dropped + # by the archive + self.assertIn('instance_actions', results) + self.assertIn('instance_actions_events', results) |