summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames E. Blair <jim@acmegating.com>2023-02-24 15:19:25 -0800
committerJames E. Blair <jim@acmegating.com>2023-03-29 17:12:13 -0700
commit7153505cd5697610d157c97576ddaa9ba9137613 (patch)
treee8368c1d8f0163c2b1cfc87aad15fba2ad0358cb
parent48ad958bb4e37e165b422daaaefb59d0ab708306 (diff)
downloadzuul-7153505cd5697610d157c97576ddaa9ba9137613.tar.gz
Fix prune-database command
This command had two problems: * It would only delete the first 50 buildsets * Depending on DB configuration, it may not have deleted anything or left orphan data. We did not tell sqlalchemy to cascade delete operations, meaning that when we deleted the buildset, we didn't delete anything else. If the database enforces foreign keys (innodb, psql) then the command would have failed. If it doesn't (myisam) then it would have deleted the buildset rows but not anything else. The tests use myisam, so they ran without error and without deleting the builds. They check that the builds are deleted, but only through the ORM via a joined load with the buildsets, and since the buildsets are gone, the builds weren't returned. To address this shortcoming, the tests now use distinct ORM methods which return objects without any joins. This would have caught the error had it been in place before. Additionally, the delet operation retained the default limit of 50 rows (set in place for the web UI), meaning that when it did run, it would only delete the most recent 50 matching builds. We now explicitly set the limit to a user-configurable batch size (by default, 10,000 builds) so that we keep transaction sizes manageable and avoid monopolizing database locks. We continue deleting buildsets in batches as long as any matching buildsets remain. This should allow users to remove very large amounts of data without affecting ongoing operations too much. Change-Id: I4c678b294eeda25589b75ab1ce7c5d0b93a07df3
-rw-r--r--releasenotes/notes/fix-prune-database-a4873bd4dead7b5f.yaml16
-rw-r--r--tests/unit/test_client.py136
-rwxr-xr-xzuul/cmd/client.py6
-rw-r--r--zuul/driver/sql/sqlconnection.py39
4 files changed, 170 insertions, 27 deletions
diff --git a/releasenotes/notes/fix-prune-database-a4873bd4dead7b5f.yaml b/releasenotes/notes/fix-prune-database-a4873bd4dead7b5f.yaml
new file mode 100644
index 000000000..6e036a754
--- /dev/null
+++ b/releasenotes/notes/fix-prune-database-a4873bd4dead7b5f.yaml
@@ -0,0 +1,16 @@
+---
+fixes:
+ - |
+ The `zuul-admin prune-database` command did not completely delete
+ expected data from the database. It may not have deleted all of
+ the buildsets older than the specified cutoff time, and it may
+ have left orphaned data in ancillary tables. This has been
+ corrected and it should now work as expected. Additionally, a
+ `--batch-size` argument has been added so that it may delete data
+ in multiple transactions which can facilitate smoother operation
+ when run while Zuul is operational.
+
+ Users who have previously run the command may need to manually
+ delete rows from the `zuul_build`, `zuul_build_event`,
+ `zuul_artifact`, and `zuul_provides` tables which do not have
+ corresponding entries in the `zuul_buildset` table.
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index b51639952..fbdad451d 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -21,10 +21,12 @@ import time
import configparser
import datetime
import dateutil.tz
+import uuid
import fixtures
import jwt
import testtools
+import sqlalchemy
from zuul.zk import ZooKeeperClient
from zuul.cmd.client import parse_cutoff
@@ -467,27 +469,107 @@ class TestDBPruneParse(BaseTestCase):
class DBPruneTestCase(ZuulTestCase):
tenant_config_file = 'config/single-tenant/main.yaml'
+ # This should be larger than the limit size in sqlconnection
+ num_buildsets = 55
+
+ def _createBuildset(self, update_time):
+ connection = self.scheds.first.sched.sql.connection
+ buildset_uuid = uuid.uuid4().hex
+ event_id = uuid.uuid4().hex
+ with connection.getSession() as db:
+ start_time = update_time - datetime.timedelta(seconds=1)
+ end_time = update_time
+ db_buildset = db.createBuildSet(
+ uuid=buildset_uuid,
+ tenant='tenant-one',
+ pipeline='check',
+ project='org/project',
+ change='1',
+ patchset='1',
+ ref='refs/changes/1',
+ oldrev='',
+ newrev='',
+ branch='master',
+ zuul_ref='Zref',
+ ref_url='http://gerrit.example.com/1',
+ event_id=event_id,
+ event_timestamp=update_time,
+ updated=update_time,
+ first_build_start_time=start_time,
+ last_build_end_time=end_time,
+ result='SUCCESS',
+ )
+ for build_num in range(2):
+ build_uuid = uuid.uuid4().hex
+ db_build = db_buildset.createBuild(
+ uuid=build_uuid,
+ job_name=f'job{build_num}',
+ start_time=start_time,
+ end_time=end_time,
+ result='SUCCESS',
+ voting=True,
+ )
+ for art_num in range(2):
+ db_build.createArtifact(
+ name=f'artifact{art_num}',
+ url='http://example.com',
+ )
+ for provides_num in range(2):
+ db_build.createProvides(
+ name=f'item{provides_num}',
+ )
+ for event_num in range(2):
+ db_build.createBuildEvent(
+ event_type=f'event{event_num}',
+ event_time=start_time,
+ )
+
+ def _query(self, db, model):
+ table = model.__table__
+ q = db.session().query(model).order_by(table.c.id.desc())
+ try:
+ return q.all()
+ except sqlalchemy.orm.exc.NoResultFound:
+ return []
+
+ def _getBuildsets(self, db):
+ return self._query(db, db.connection.buildSetModel)
+
+ def _getBuilds(self, db):
+ return self._query(db, db.connection.buildModel)
+
+ def _getProvides(self, db):
+ return self._query(db, db.connection.providesModel)
+
+ def _getArtifacts(self, db):
+ return self._query(db, db.connection.artifactModel)
+
+ def _getBuildEvents(self, db):
+ return self._query(db, db.connection.buildEventModel)
def _setup(self):
config_file = os.path.join(self.test_root, 'zuul.conf')
with open(config_file, 'w') as f:
self.config.write(f)
- A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
- self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
- self.waitUntilSettled()
-
- time.sleep(1)
-
- B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
- self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
- self.waitUntilSettled()
+ update_time = (datetime.datetime.utcnow() -
+ datetime.timedelta(minutes=self.num_buildsets))
+ for x in range(self.num_buildsets):
+ update_time = update_time + datetime.timedelta(minutes=1)
+ self._createBuildset(update_time)
connection = self.scheds.first.sched.sql.connection
- buildsets = connection.getBuildsets()
- builds = connection.getBuilds()
- self.assertEqual(len(buildsets), 2)
- self.assertEqual(len(builds), 6)
+ with connection.getSession() as db:
+ buildsets = self._getBuildsets(db)
+ builds = self._getBuilds(db)
+ artifacts = self._getArtifacts(db)
+ provides = self._getProvides(db)
+ events = self._getBuildEvents(db)
+ self.assertEqual(len(buildsets), self.num_buildsets)
+ self.assertEqual(len(builds), 2 * self.num_buildsets)
+ self.assertEqual(len(artifacts), 4 * self.num_buildsets)
+ self.assertEqual(len(provides), 4 * self.num_buildsets)
+ self.assertEqual(len(events), 4 * self.num_buildsets)
for build in builds:
self.log.debug("Build %s %s %s",
build, build.start_time, build.end_time)
@@ -503,6 +585,7 @@ class DBPruneTestCase(ZuulTestCase):
start_time = buildsets[0].first_build_start_time
self.log.debug("Cutoff %s", start_time)
+ # Use the default batch size (omit --batch-size arg)
p = subprocess.Popen(
[os.path.join(sys.prefix, 'bin/zuul-admin'),
'-c', config_file,
@@ -513,13 +596,20 @@ class DBPruneTestCase(ZuulTestCase):
out, _ = p.communicate()
self.log.debug(out.decode('utf8'))
- buildsets = connection.getBuildsets()
- builds = connection.getBuilds()
- self.assertEqual(len(buildsets), 1)
- self.assertEqual(len(builds), 3)
+ with connection.getSession() as db:
+ buildsets = self._getBuildsets(db)
+ builds = self._getBuilds(db)
+ artifacts = self._getArtifacts(db)
+ provides = self._getProvides(db)
+ events = self._getBuildEvents(db)
for build in builds:
self.log.debug("Build %s %s %s",
build, build.start_time, build.end_time)
+ self.assertEqual(len(buildsets), 1)
+ self.assertEqual(len(builds), 2)
+ self.assertEqual(len(artifacts), 4)
+ self.assertEqual(len(provides), 4)
+ self.assertEqual(len(events), 4)
def test_db_prune_older_than(self):
# Test pruning buildsets older than a relative time
@@ -535,15 +625,23 @@ class DBPruneTestCase(ZuulTestCase):
'-c', config_file,
'prune-database',
'--older-than', '0d',
+ '--batch-size', '5',
],
stdout=subprocess.PIPE)
out, _ = p.communicate()
self.log.debug(out.decode('utf8'))
- buildsets = connection.getBuildsets()
- builds = connection.getBuilds()
+ with connection.getSession() as db:
+ buildsets = self._getBuildsets(db)
+ builds = self._getBuilds(db)
+ artifacts = self._getArtifacts(db)
+ provides = self._getProvides(db)
+ events = self._getBuildEvents(db)
self.assertEqual(len(buildsets), 0)
self.assertEqual(len(builds), 0)
+ self.assertEqual(len(artifacts), 0)
+ self.assertEqual(len(provides), 0)
+ self.assertEqual(len(events), 0)
class TestDBPruneMysql(DBPruneTestCase):
diff --git a/zuul/cmd/client.py b/zuul/cmd/client.py
index 031b10a1e..f9b8e1aed 100755
--- a/zuul/cmd/client.py
+++ b/zuul/cmd/client.py
@@ -542,6 +542,10 @@ class Client(zuul.cmd.ZuulApp):
cmd_prune_database.add_argument(
'--older-than',
help='relative time (e.g., "24h" or "180d")')
+ cmd_prune_database.add_argument(
+ '--batch-size',
+ default=10000,
+ help='transaction batch size')
cmd_prune_database.set_defaults(func=self.prune_database)
return parser
@@ -1061,7 +1065,7 @@ class Client(zuul.cmd.ZuulApp):
cutoff = parse_cutoff(now, args.before, args.older_than)
self.configure_connections(source_only=False, require_sql=True)
connection = self.connections.getSqlConnection()
- connection.deleteBuildsets(cutoff)
+ connection.deleteBuildsets(cutoff, args.batch_size)
sys.exit(0)
diff --git a/zuul/driver/sql/sqlconnection.py b/zuul/driver/sql/sqlconnection.py
index 2d5c39ec3..7a4aea626 100644
--- a/zuul/driver/sql/sqlconnection.py
+++ b/zuul/driver/sql/sqlconnection.py
@@ -247,12 +247,25 @@ class DatabaseSession(object):
except sqlalchemy.orm.exc.MultipleResultsFound:
raise Exception("Multiple buildset found with uuid %s", uuid)
- def deleteBuildsets(self, cutoff):
+ def deleteBuildsets(self, cutoff, batch_size):
"""Delete buildsets before the cutoff"""
# delete buildsets updated before the cutoff
- for buildset in self.getBuildsets(updated_max=cutoff):
- self.session().delete(buildset)
+ deleted = True
+ while deleted:
+ deleted = False
+ oldest = None
+ for buildset in self.getBuildsets(
+ updated_max=cutoff, limit=batch_size):
+ deleted = True
+ if oldest is None:
+ oldest = buildset.updated
+ else:
+ oldest = min(oldest, buildset.updated)
+ self.session().delete(buildset)
+ self.session().commit()
+ if deleted:
+ self.log.info("Deleted from %s to %s", oldest, cutoff)
class SQLConnection(BaseConnection):
@@ -409,7 +422,10 @@ class SQLConnection(BaseConnection):
final = sa.Column(sa.Boolean)
held = sa.Column(sa.Boolean)
nodeset = sa.Column(sa.String(255))
- buildset = orm.relationship(BuildSetModel, backref="builds")
+ buildset = orm.relationship(BuildSetModel,
+ backref=orm.backref(
+ "builds",
+ cascade="all, delete-orphan"))
sa.Index(self.table_prefix + 'job_name_buildset_id_idx',
job_name, buildset_id)
@@ -468,7 +484,10 @@ class SQLConnection(BaseConnection):
name = sa.Column(sa.String(255))
url = sa.Column(sa.TEXT())
meta = sa.Column('metadata', sa.TEXT())
- build = orm.relationship(BuildModel, backref="artifacts")
+ build = orm.relationship(BuildModel,
+ backref=orm.backref(
+ "artifacts",
+ cascade="all, delete-orphan"))
class ProvidesModel(Base):
__tablename__ = self.table_prefix + PROVIDES_TABLE
@@ -476,7 +495,10 @@ class SQLConnection(BaseConnection):
build_id = sa.Column(sa.Integer, sa.ForeignKey(
self.table_prefix + BUILD_TABLE + ".id"))
name = sa.Column(sa.String(255))
- build = orm.relationship(BuildModel, backref="provides")
+ build = orm.relationship(BuildModel,
+ backref=orm.backref(
+ "provides",
+ cascade="all, delete-orphan"))
class BuildEventModel(Base):
__tablename__ = self.table_prefix + BUILD_EVENTS_TABLE
@@ -486,7 +508,10 @@ class SQLConnection(BaseConnection):
event_time = sa.Column(sa.DateTime)
event_type = sa.Column(sa.String(255))
description = sa.Column(sa.TEXT())
- build = orm.relationship(BuildModel, backref="build_events")
+ build = orm.relationship(BuildModel,
+ backref=orm.backref(
+ "build_events",
+ cascade="all, delete-orphan"))
self.buildEventModel = BuildEventModel
self.zuul_build_event_table = self.buildEventModel.__table__