summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuul <zuul@review.opendev.org>2023-03-30 01:49:54 +0000
committerGerrit Code Review <review@openstack.org>2023-03-30 01:49:54 +0000
commit987fba9f28e3e4d0e6060f2ebc834c2adf654262 (patch)
treeb6c625c8418cd921b845ac005a63b3f4e393ef5e
parent8ebab861c54534efc8ee3b1de7d065f1848576bf (diff)
parent7153505cd5697610d157c97576ddaa9ba9137613 (diff)
downloadzuul-987fba9f28e3e4d0e6060f2ebc834c2adf654262.tar.gz
Merge "Fix prune-database command"
-rw-r--r--releasenotes/notes/fix-prune-database-a4873bd4dead7b5f.yaml16
-rw-r--r--tests/unit/test_client.py136
-rwxr-xr-xzuul/cmd/client.py6
-rw-r--r--zuul/driver/sql/sqlconnection.py39
4 files changed, 170 insertions, 27 deletions
diff --git a/releasenotes/notes/fix-prune-database-a4873bd4dead7b5f.yaml b/releasenotes/notes/fix-prune-database-a4873bd4dead7b5f.yaml
new file mode 100644
index 000000000..6e036a754
--- /dev/null
+++ b/releasenotes/notes/fix-prune-database-a4873bd4dead7b5f.yaml
@@ -0,0 +1,16 @@
+---
+fixes:
+ - |
+ The `zuul-admin prune-database` command did not completely delete
+ expected data from the database. It may not have deleted all of
+ the buildsets older than the specified cutoff time, and it may
+ have left orphaned data in ancillary tables. This has been
+ corrected and it should now work as expected. Additionally, a
+ `--batch-size` argument has been added so that it may delete data
+ in multiple transactions which can facilitate smoother operation
+ when run while Zuul is operational.
+
+ Users who have previously run the command may need to manually
+ delete rows from the `zuul_build`, `zuul_build_event`,
+ `zuul_artifact`, and `zuul_provides` tables which do not have
+ corresponding entries in the `zuul_buildset` table.
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index f241147eb..2e90d3fb4 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -21,10 +21,12 @@ import time
import configparser
import datetime
import dateutil.tz
+import uuid
import fixtures
import jwt
import testtools
+import sqlalchemy
from zuul.zk import ZooKeeperClient
from zuul.zk.locks import SessionAwareLock
@@ -499,27 +501,107 @@ class TestDBPruneParse(BaseTestCase):
class DBPruneTestCase(ZuulTestCase):
tenant_config_file = 'config/single-tenant/main.yaml'
+ # This should be larger than the limit size in sqlconnection
+ num_buildsets = 55
+
+ def _createBuildset(self, update_time):
+ connection = self.scheds.first.sched.sql.connection
+ buildset_uuid = uuid.uuid4().hex
+ event_id = uuid.uuid4().hex
+ with connection.getSession() as db:
+ start_time = update_time - datetime.timedelta(seconds=1)
+ end_time = update_time
+ db_buildset = db.createBuildSet(
+ uuid=buildset_uuid,
+ tenant='tenant-one',
+ pipeline='check',
+ project='org/project',
+ change='1',
+ patchset='1',
+ ref='refs/changes/1',
+ oldrev='',
+ newrev='',
+ branch='master',
+ zuul_ref='Zref',
+ ref_url='http://gerrit.example.com/1',
+ event_id=event_id,
+ event_timestamp=update_time,
+ updated=update_time,
+ first_build_start_time=start_time,
+ last_build_end_time=end_time,
+ result='SUCCESS',
+ )
+ for build_num in range(2):
+ build_uuid = uuid.uuid4().hex
+ db_build = db_buildset.createBuild(
+ uuid=build_uuid,
+ job_name=f'job{build_num}',
+ start_time=start_time,
+ end_time=end_time,
+ result='SUCCESS',
+ voting=True,
+ )
+ for art_num in range(2):
+ db_build.createArtifact(
+ name=f'artifact{art_num}',
+ url='http://example.com',
+ )
+ for provides_num in range(2):
+ db_build.createProvides(
+ name=f'item{provides_num}',
+ )
+ for event_num in range(2):
+ db_build.createBuildEvent(
+ event_type=f'event{event_num}',
+ event_time=start_time,
+ )
+
+ def _query(self, db, model):
+ table = model.__table__
+ q = db.session().query(model).order_by(table.c.id.desc())
+ try:
+ return q.all()
+ except sqlalchemy.orm.exc.NoResultFound:
+ return []
+
+ def _getBuildsets(self, db):
+ return self._query(db, db.connection.buildSetModel)
+
+ def _getBuilds(self, db):
+ return self._query(db, db.connection.buildModel)
+
+ def _getProvides(self, db):
+ return self._query(db, db.connection.providesModel)
+
+ def _getArtifacts(self, db):
+ return self._query(db, db.connection.artifactModel)
+
+ def _getBuildEvents(self, db):
+ return self._query(db, db.connection.buildEventModel)
def _setup(self):
config_file = os.path.join(self.test_root, 'zuul.conf')
with open(config_file, 'w') as f:
self.config.write(f)
- A = self.fake_gerrit.addFakeChange('org/project', 'master', 'A')
- self.fake_gerrit.addEvent(A.getPatchsetCreatedEvent(1))
- self.waitUntilSettled()
-
- time.sleep(1)
-
- B = self.fake_gerrit.addFakeChange('org/project', 'master', 'B')
- self.fake_gerrit.addEvent(B.getPatchsetCreatedEvent(1))
- self.waitUntilSettled()
+ update_time = (datetime.datetime.utcnow() -
+ datetime.timedelta(minutes=self.num_buildsets))
+ for x in range(self.num_buildsets):
+ update_time = update_time + datetime.timedelta(minutes=1)
+ self._createBuildset(update_time)
connection = self.scheds.first.sched.sql.connection
- buildsets = connection.getBuildsets()
- builds = connection.getBuilds()
- self.assertEqual(len(buildsets), 2)
- self.assertEqual(len(builds), 6)
+ with connection.getSession() as db:
+ buildsets = self._getBuildsets(db)
+ builds = self._getBuilds(db)
+ artifacts = self._getArtifacts(db)
+ provides = self._getProvides(db)
+ events = self._getBuildEvents(db)
+ self.assertEqual(len(buildsets), self.num_buildsets)
+ self.assertEqual(len(builds), 2 * self.num_buildsets)
+ self.assertEqual(len(artifacts), 4 * self.num_buildsets)
+ self.assertEqual(len(provides), 4 * self.num_buildsets)
+ self.assertEqual(len(events), 4 * self.num_buildsets)
for build in builds:
self.log.debug("Build %s %s %s",
build, build.start_time, build.end_time)
@@ -535,6 +617,7 @@ class DBPruneTestCase(ZuulTestCase):
start_time = buildsets[0].first_build_start_time
self.log.debug("Cutoff %s", start_time)
+ # Use the default batch size (omit --batch-size arg)
p = subprocess.Popen(
[os.path.join(sys.prefix, 'bin/zuul-admin'),
'-c', config_file,
@@ -545,13 +628,20 @@ class DBPruneTestCase(ZuulTestCase):
out, _ = p.communicate()
self.log.debug(out.decode('utf8'))
- buildsets = connection.getBuildsets()
- builds = connection.getBuilds()
- self.assertEqual(len(buildsets), 1)
- self.assertEqual(len(builds), 3)
+ with connection.getSession() as db:
+ buildsets = self._getBuildsets(db)
+ builds = self._getBuilds(db)
+ artifacts = self._getArtifacts(db)
+ provides = self._getProvides(db)
+ events = self._getBuildEvents(db)
for build in builds:
self.log.debug("Build %s %s %s",
build, build.start_time, build.end_time)
+ self.assertEqual(len(buildsets), 1)
+ self.assertEqual(len(builds), 2)
+ self.assertEqual(len(artifacts), 4)
+ self.assertEqual(len(provides), 4)
+ self.assertEqual(len(events), 4)
def test_db_prune_older_than(self):
# Test pruning buildsets older than a relative time
@@ -567,15 +657,23 @@ class DBPruneTestCase(ZuulTestCase):
'-c', config_file,
'prune-database',
'--older-than', '0d',
+ '--batch-size', '5',
],
stdout=subprocess.PIPE)
out, _ = p.communicate()
self.log.debug(out.decode('utf8'))
- buildsets = connection.getBuildsets()
- builds = connection.getBuilds()
+ with connection.getSession() as db:
+ buildsets = self._getBuildsets(db)
+ builds = self._getBuilds(db)
+ artifacts = self._getArtifacts(db)
+ provides = self._getProvides(db)
+ events = self._getBuildEvents(db)
self.assertEqual(len(buildsets), 0)
self.assertEqual(len(builds), 0)
+ self.assertEqual(len(artifacts), 0)
+ self.assertEqual(len(provides), 0)
+ self.assertEqual(len(events), 0)
class TestDBPruneMysql(DBPruneTestCase):
diff --git a/zuul/cmd/client.py b/zuul/cmd/client.py
index 62e51ac3f..6fa20c7c4 100755
--- a/zuul/cmd/client.py
+++ b/zuul/cmd/client.py
@@ -540,6 +540,10 @@ class Client(zuul.cmd.ZuulApp):
cmd_prune_database.add_argument(
'--older-than',
help='relative time (e.g., "24h" or "180d")')
+ cmd_prune_database.add_argument(
+ '--batch-size',
+ default=10000,
+ help='transaction batch size')
cmd_prune_database.set_defaults(func=self.prune_database)
return parser
@@ -1049,7 +1053,7 @@ class Client(zuul.cmd.ZuulApp):
cutoff = parse_cutoff(now, args.before, args.older_than)
self.configure_connections(source_only=False, require_sql=True)
connection = self.connections.getSqlConnection()
- connection.deleteBuildsets(cutoff)
+ connection.deleteBuildsets(cutoff, args.batch_size)
sys.exit(0)
diff --git a/zuul/driver/sql/sqlconnection.py b/zuul/driver/sql/sqlconnection.py
index 2d5c39ec3..7a4aea626 100644
--- a/zuul/driver/sql/sqlconnection.py
+++ b/zuul/driver/sql/sqlconnection.py
@@ -247,12 +247,25 @@ class DatabaseSession(object):
except sqlalchemy.orm.exc.MultipleResultsFound:
raise Exception("Multiple buildset found with uuid %s", uuid)
- def deleteBuildsets(self, cutoff):
+ def deleteBuildsets(self, cutoff, batch_size):
"""Delete buildsets before the cutoff"""
# delete buildsets updated before the cutoff
- for buildset in self.getBuildsets(updated_max=cutoff):
- self.session().delete(buildset)
+ deleted = True
+ while deleted:
+ deleted = False
+ oldest = None
+ for buildset in self.getBuildsets(
+ updated_max=cutoff, limit=batch_size):
+ deleted = True
+ if oldest is None:
+ oldest = buildset.updated
+ else:
+ oldest = min(oldest, buildset.updated)
+ self.session().delete(buildset)
+ self.session().commit()
+ if deleted:
+ self.log.info("Deleted from %s to %s", oldest, cutoff)
class SQLConnection(BaseConnection):
@@ -409,7 +422,10 @@ class SQLConnection(BaseConnection):
final = sa.Column(sa.Boolean)
held = sa.Column(sa.Boolean)
nodeset = sa.Column(sa.String(255))
- buildset = orm.relationship(BuildSetModel, backref="builds")
+ buildset = orm.relationship(BuildSetModel,
+ backref=orm.backref(
+ "builds",
+ cascade="all, delete-orphan"))
sa.Index(self.table_prefix + 'job_name_buildset_id_idx',
job_name, buildset_id)
@@ -468,7 +484,10 @@ class SQLConnection(BaseConnection):
name = sa.Column(sa.String(255))
url = sa.Column(sa.TEXT())
meta = sa.Column('metadata', sa.TEXT())
- build = orm.relationship(BuildModel, backref="artifacts")
+ build = orm.relationship(BuildModel,
+ backref=orm.backref(
+ "artifacts",
+ cascade="all, delete-orphan"))
class ProvidesModel(Base):
__tablename__ = self.table_prefix + PROVIDES_TABLE
@@ -476,7 +495,10 @@ class SQLConnection(BaseConnection):
build_id = sa.Column(sa.Integer, sa.ForeignKey(
self.table_prefix + BUILD_TABLE + ".id"))
name = sa.Column(sa.String(255))
- build = orm.relationship(BuildModel, backref="provides")
+ build = orm.relationship(BuildModel,
+ backref=orm.backref(
+ "provides",
+ cascade="all, delete-orphan"))
class BuildEventModel(Base):
__tablename__ = self.table_prefix + BUILD_EVENTS_TABLE
@@ -486,7 +508,10 @@ class SQLConnection(BaseConnection):
event_time = sa.Column(sa.DateTime)
event_type = sa.Column(sa.String(255))
description = sa.Column(sa.TEXT())
- build = orm.relationship(BuildModel, backref="build_events")
+ build = orm.relationship(BuildModel,
+ backref=orm.backref(
+ "build_events",
+ cascade="all, delete-orphan"))
self.buildEventModel = BuildEventModel
self.zuul_build_event_table = self.buildEventModel.__table__