summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames E. Blair <jim@acmegating.com>2023-03-13 14:48:41 -0700
committerJames E. Blair <jim@acmegating.com>2023-03-13 14:57:29 -0700
commit84c0420792a1ac6c3205b9b7282e800c1d8623d0 (patch)
treee006b69cbe0b84e45e81877f5d8381eaf67e7872
parent0d35927e2c0a010aae92b8f08997b036c318d55a (diff)
downloadzuul-84c0420792a1ac6c3205b9b7282e800c1d8623d0.tar.gz
Add statement timeouts to some web sql queries
The SQL queries are designed to be highly optimized and should return in milliseconds even with millions of rows. However, sometimes query planners are misled by certain characteristics and can end up performing suboptimally. To protect the web server in case that happens, set a statement or query timeout for the queries which list builds or buildsets. This will instruct mysql or postgresql to limit execution of the buildset or build listing queries to 30 seconds -- but only if these queries originate in zuul-web. Other users (such as the admin tools) may still run these queries without an explicit time limit (though the server may still have one). Unfortunately (or perhaps fortunately) the RDBMSs can occasionally satisfy the queries we use in testing in less than 1ms, making a functional test of this feature impractical (we are unable to set the timeout to 0ms). Change-Id: If2f01b33dc679ab7cf952a4fbf095a1f3b6e4faf
-rw-r--r--zuul/driver/sql/sqlconnection.py45
-rwxr-xr-xzuul/web/__init__.py11
2 files changed, 51 insertions, 5 deletions
diff --git a/zuul/driver/sql/sqlconnection.py b/zuul/driver/sql/sqlconnection.py
index 2d5c39ec3..360478e6b 100644
--- a/zuul/driver/sql/sqlconnection.py
+++ b/zuul/driver/sql/sqlconnection.py
@@ -13,6 +13,7 @@
# under the License.
import logging
+import re
import time
from urllib.parse import quote_plus
@@ -27,12 +28,29 @@ import sqlalchemy.pool
from zuul.connection import BaseConnection
from zuul.zk.locks import CONNECTION_LOCK_ROOT, locked, SessionAwareLock
+
BUILDSET_TABLE = 'zuul_buildset'
BUILD_TABLE = 'zuul_build'
BUILD_EVENTS_TABLE = 'zuul_build_event'
ARTIFACT_TABLE = 'zuul_artifact'
PROVIDES_TABLE = 'zuul_provides'
+STATEMENT_TIMEOUT_RE = re.compile(r'/\* statement_timeout=(\d+) \*/')
+
+
+# In Postgres we can set a per-transaction (which for us is
+# effectively per-query) execution timeout by executing "SET LOCAL
+# statement_timeout" before our query. There isn't a great way to do
+# this using the SQLAlchemy query API, so instead, we add a comment as
+# a hint, and here we parse that comment and execute the "SET". The
+# comment remains in the query sent to the server, but that's okay --
+# it may even help an operator in debugging.
+@sa.event.listens_for(sa.Engine, "before_cursor_execute")
+def _set_timeout(conn, cursor, stmt, params, context, executemany):
+ match = STATEMENT_TIMEOUT_RE.search(stmt)
+ if match:
+ cursor.execute("SET LOCAL statement_timeout=%s" % match.groups())
+
class DatabaseSession(object):
@@ -76,7 +94,7 @@ class DatabaseSession(object):
result=None, provides=None, final=None, held=None,
complete=None, sort_by_buildset=False, limit=50,
offset=0, idx_min=None, idx_max=None,
- exclude_result=None):
+ exclude_result=None, query_timeout=None):
build_table = self.connection.zuul_build_table
buildset_table = self.connection.zuul_buildset_table
@@ -104,6 +122,17 @@ class DatabaseSession(object):
if not (project or change or uuid):
q = q.with_hint(build_table, 'USE INDEX (PRIMARY)', 'mysql')
+ if query_timeout:
+ # For MySQL, we can add a query hint directly.
+ q = q.prefix_with(
+ f'/*+ MAX_EXECUTION_TIME({query_timeout}) */',
+ dialect='mysql')
+ # For Postgres, we add a comment that we parse in our
+ # event handler.
+ q = q.with_statement_hint(
+ f'/* statement_timeout={query_timeout} */',
+ dialect_name='postgresql')
+
q = self.listFilter(q, buildset_table.c.tenant, tenant)
q = self.listFilter(q, buildset_table.c.project, project)
q = self.listFilter(q, buildset_table.c.pipeline, pipeline)
@@ -185,7 +214,8 @@ class DatabaseSession(object):
change=None, branch=None, patchset=None, ref=None,
newrev=None, uuid=None, result=None, complete=None,
updated_max=None,
- limit=50, offset=0, idx_min=None, idx_max=None):
+ limit=50, offset=0, idx_min=None, idx_max=None,
+ query_timeout=None):
buildset_table = self.connection.zuul_buildset_table
@@ -194,6 +224,17 @@ class DatabaseSession(object):
if not (project or change or uuid):
q = q.with_hint(buildset_table, 'USE INDEX (PRIMARY)', 'mysql')
+ if query_timeout:
+ # For MySQL, we can add a query hint directly.
+ q = q.prefix_with(
+ f'/*+ MAX_EXECUTION_TIME({query_timeout}) */',
+ dialect='mysql')
+ # For Postgres, we add a comment that we parse in our
+ # event handler.
+ q = q.with_statement_hint(
+ f'/* statement_timeout={query_timeout} */',
+ dialect_name='postgresql')
+
q = self.listFilter(q, buildset_table.c.tenant, tenant)
q = self.listFilter(q, buildset_table.c.project, project)
q = self.listFilter(q, buildset_table.c.pipeline, pipeline)
diff --git a/zuul/web/__init__.py b/zuul/web/__init__.py
index 47226fd7d..8a520a570 100755
--- a/zuul/web/__init__.py
+++ b/zuul/web/__init__.py
@@ -463,6 +463,8 @@ class ZuulWebAPI(object):
self.cache_expiry = 1
self.static_cache_expiry = zuulweb.static_cache_expiry
+ # SQL build query timeout, in milliseconds:
+ self.query_timeout = 30000
@property
def log(self):
@@ -1454,7 +1456,8 @@ class ZuulWebAPI(object):
newrev=newrev, uuid=uuid, job_name=job_name, voting=voting,
nodeset=nodeset, result=result, final=final, held=held,
complete=complete, limit=limit, offset=skip, idx_min=_idx_min,
- idx_max=_idx_max, exclude_result=exclude_result)
+ idx_max=_idx_max, exclude_result=exclude_result,
+ query_timeout=self.query_timeout)
return [self.buildToDict(b, b.buildset) for b in builds]
@@ -1510,7 +1513,8 @@ class ZuulWebAPI(object):
buildsets = connection.getBuildsets(
tenant=tenant_name, project=project, pipeline=pipeline,
- branch=branch, complete=True, limit=1)
+ branch=branch, complete=True, limit=1,
+ query_timeout=self.query_timeout)
if not buildsets:
raise cherrypy.HTTPError(404, 'No buildset found')
@@ -1551,7 +1555,8 @@ class ZuulWebAPI(object):
tenant=tenant_name, project=project, pipeline=pipeline,
change=change, branch=branch, patchset=patchset, ref=ref,
newrev=newrev, uuid=uuid, result=result, complete=complete,
- limit=limit, offset=skip, idx_min=_idx_min, idx_max=_idx_max)
+ limit=limit, offset=skip, idx_min=_idx_min, idx_max=_idx_max,
+ query_timeout=self.query_timeout)
return [self.buildsetToDict(b) for b in buildsets]