From 84c0420792a1ac6c3205b9b7282e800c1d8623d0 Mon Sep 17 00:00:00 2001 From: "James E. Blair" Date: Mon, 13 Mar 2023 14:48:41 -0700 Subject: Add statement timeouts to some web sql queries The SQL queries are designed to be highly optimized and should return in milliseconds even with millions of rows. However, sometimes query planners are misled by certain characteristics and can end up performing suboptimally. To protect the web server in case that happens, set a statement or query timeout for the queries which list builds or buildsets. This will instruct mysql or postgresql to limit execution of the buildset or build listing queries to 30 seconds -- but only if these queries originate in zuul-web. Other users (such as the admin tools) may still run these queries without an explicit time limit (though the server may still have one). Unfortunately (or perhaps fortunately) the RDBMSs can occasionally satisfy the queries we use in testing in less than 1ms, making a functional test of this feature impractical (we are unable to set the timeout to 0ms). Change-Id: If2f01b33dc679ab7cf952a4fbf095a1f3b6e4faf --- zuul/driver/sql/sqlconnection.py | 45 ++++++++++++++++++++++++++++++++++++++-- zuul/web/__init__.py | 11 +++++++--- 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/zuul/driver/sql/sqlconnection.py b/zuul/driver/sql/sqlconnection.py index 2d5c39ec3..360478e6b 100644 --- a/zuul/driver/sql/sqlconnection.py +++ b/zuul/driver/sql/sqlconnection.py @@ -13,6 +13,7 @@ # under the License. import logging +import re import time from urllib.parse import quote_plus @@ -27,12 +28,29 @@ import sqlalchemy.pool from zuul.connection import BaseConnection from zuul.zk.locks import CONNECTION_LOCK_ROOT, locked, SessionAwareLock + BUILDSET_TABLE = 'zuul_buildset' BUILD_TABLE = 'zuul_build' BUILD_EVENTS_TABLE = 'zuul_build_event' ARTIFACT_TABLE = 'zuul_artifact' PROVIDES_TABLE = 'zuul_provides' +STATEMENT_TIMEOUT_RE = re.compile(r'/\* statement_timeout=(\d+) \*/') + + +# In Postgres we can set a per-transaction (which for us is +# effectively per-query) execution timeout by executing "SET LOCAL +# statement_timeout" before our query. There isn't a great way to do +# this using the SQLAlchemy query API, so instead, we add a comment as +# a hint, and here we parse that comment and execute the "SET". The +# comment remains in the query sent to the server, but that's okay -- +# it may even help an operator in debugging. +@sa.event.listens_for(sa.Engine, "before_cursor_execute") +def _set_timeout(conn, cursor, stmt, params, context, executemany): + match = STATEMENT_TIMEOUT_RE.search(stmt) + if match: + cursor.execute("SET LOCAL statement_timeout=%s" % match.groups()) + class DatabaseSession(object): @@ -76,7 +94,7 @@ class DatabaseSession(object): result=None, provides=None, final=None, held=None, complete=None, sort_by_buildset=False, limit=50, offset=0, idx_min=None, idx_max=None, - exclude_result=None): + exclude_result=None, query_timeout=None): build_table = self.connection.zuul_build_table buildset_table = self.connection.zuul_buildset_table @@ -104,6 +122,17 @@ class DatabaseSession(object): if not (project or change or uuid): q = q.with_hint(build_table, 'USE INDEX (PRIMARY)', 'mysql') + if query_timeout: + # For MySQL, we can add a query hint directly. + q = q.prefix_with( + f'/*+ MAX_EXECUTION_TIME({query_timeout}) */', + dialect='mysql') + # For Postgres, we add a comment that we parse in our + # event handler. + q = q.with_statement_hint( + f'/* statement_timeout={query_timeout} */', + dialect_name='postgresql') + q = self.listFilter(q, buildset_table.c.tenant, tenant) q = self.listFilter(q, buildset_table.c.project, project) q = self.listFilter(q, buildset_table.c.pipeline, pipeline) @@ -185,7 +214,8 @@ class DatabaseSession(object): change=None, branch=None, patchset=None, ref=None, newrev=None, uuid=None, result=None, complete=None, updated_max=None, - limit=50, offset=0, idx_min=None, idx_max=None): + limit=50, offset=0, idx_min=None, idx_max=None, + query_timeout=None): buildset_table = self.connection.zuul_buildset_table @@ -194,6 +224,17 @@ class DatabaseSession(object): if not (project or change or uuid): q = q.with_hint(buildset_table, 'USE INDEX (PRIMARY)', 'mysql') + if query_timeout: + # For MySQL, we can add a query hint directly. + q = q.prefix_with( + f'/*+ MAX_EXECUTION_TIME({query_timeout}) */', + dialect='mysql') + # For Postgres, we add a comment that we parse in our + # event handler. + q = q.with_statement_hint( + f'/* statement_timeout={query_timeout} */', + dialect_name='postgresql') + q = self.listFilter(q, buildset_table.c.tenant, tenant) q = self.listFilter(q, buildset_table.c.project, project) q = self.listFilter(q, buildset_table.c.pipeline, pipeline) diff --git a/zuul/web/__init__.py b/zuul/web/__init__.py index 47226fd7d..8a520a570 100755 --- a/zuul/web/__init__.py +++ b/zuul/web/__init__.py @@ -463,6 +463,8 @@ class ZuulWebAPI(object): self.cache_expiry = 1 self.static_cache_expiry = zuulweb.static_cache_expiry + # SQL build query timeout, in milliseconds: + self.query_timeout = 30000 @property def log(self): @@ -1454,7 +1456,8 @@ class ZuulWebAPI(object): newrev=newrev, uuid=uuid, job_name=job_name, voting=voting, nodeset=nodeset, result=result, final=final, held=held, complete=complete, limit=limit, offset=skip, idx_min=_idx_min, - idx_max=_idx_max, exclude_result=exclude_result) + idx_max=_idx_max, exclude_result=exclude_result, + query_timeout=self.query_timeout) return [self.buildToDict(b, b.buildset) for b in builds] @@ -1510,7 +1513,8 @@ class ZuulWebAPI(object): buildsets = connection.getBuildsets( tenant=tenant_name, project=project, pipeline=pipeline, - branch=branch, complete=True, limit=1) + branch=branch, complete=True, limit=1, + query_timeout=self.query_timeout) if not buildsets: raise cherrypy.HTTPError(404, 'No buildset found') @@ -1551,7 +1555,8 @@ class ZuulWebAPI(object): tenant=tenant_name, project=project, pipeline=pipeline, change=change, branch=branch, patchset=patchset, ref=ref, newrev=newrev, uuid=uuid, result=result, complete=complete, - limit=limit, offset=skip, idx_min=_idx_min, idx_max=_idx_max) + limit=limit, offset=skip, idx_min=_idx_min, idx_max=_idx_max, + query_timeout=self.query_timeout) return [self.buildsetToDict(b) for b in buildsets] -- cgit v1.2.1