summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames E. Blair <jim@acmegating.com>2023-02-27 13:45:27 -0800
committerJames E. Blair <jim@acmegating.com>2023-03-01 13:54:46 -0800
commitb1490b1d8e55ebca90533ccc55d8a6bef999197a (patch)
tree4016def31b21c859148f9c79beea7c791a37fa67
parent7a8882c642d631247f2339ac67bb3916933d754e (diff)
downloadzuul-b1490b1d8e55ebca90533ccc55d8a6bef999197a.tar.gz
Avoid layout updates after delete-pipeline-state
The delete-pipeline-state commend forces a layout update on every scheduler, but that isn't strictly necessary. While it may be helpful for some issues, if it really is necessary, the operator can issue a tenant reconfiguration after performing the delete-pipeline-state. In most cases, where only the state information itself is causing a problem, we can omit the layout updates and assume that the state reset alone is sufficient. To that end, this change removes the layout state changes from the delete-pipeline-state command and instead simply empties and recreates the pipeline state and change list objects. This is very similar to what happens in the pipeline manager _postConfig call, except in this case, we have the tenant lock so we know we can write with imputinity, and we know we are creating objects in ZK from scratch, so we use direct create calls. We set the pipeline state's layout uuid to None, which will cause the first scheduler that comes across it to (assuming its internal layout is up to date) perform a pipeline reset (which is almost a noop on an empty pipeline) and update the pipeline state layout to the current tenant layout state. Change-Id: I1c503280b516ffa7bbe4cf456d9c900b500e16b0
-rw-r--r--tests/unit/test_client.py78
-rwxr-xr-xzuul/cmd/client.py37
2 files changed, 62 insertions, 53 deletions
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index 1f2b3d220..f241147eb 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -27,7 +27,7 @@ import jwt
import testtools
from zuul.zk import ZooKeeperClient
-from zuul.zk.locks import pipeline_lock
+from zuul.zk.locks import SessionAwareLock
from zuul.cmd.client import parse_cutoff
from tests.base import BaseTestCase, ZuulTestCase
@@ -365,6 +365,7 @@ class TestOnlineZKOperations(ZuulTestCase):
def _test_delete_pipeline(self, pipeline):
sched = self.scheds.first.sched
+ tenant = sched.abide.tenants['tenant-one']
# Force a reconfiguration due to a config change (so that the
# tenant trigger event queue gets a minimum timestamp set)
file_dict = {'zuul.yaml': ''}
@@ -388,7 +389,11 @@ class TestOnlineZKOperations(ZuulTestCase):
# queue and stay there while we delete the pipeline state).
# This way we verify that events arrived before the deletion
# still work.
- with pipeline_lock(self.zk_client, 'tenant-one', pipeline):
+ plock = SessionAwareLock(
+ self.zk_client.client,
+ f"/zuul/locks/pipeline/{tenant.name}/{pipeline}")
+ plock.acquire(blocking=True, timeout=None)
+ try:
self.log.debug('Got pipeline lock')
# Add a new event while our old last reconfigure time is
# in place.
@@ -403,36 +408,46 @@ class TestOnlineZKOperations(ZuulTestCase):
self.log.debug('Waiting for event')
for x in iterate_timeout(30, 'trigger event queue has events'):
if sched.pipeline_trigger_events[
- 'tenant-one'][pipeline].hasEvents():
+ tenant.name][pipeline].hasEvents():
break
self.log.debug('Got event')
- # It's not necessary to grab the run lock here, but if we
- # don't the scheduler will busy-wait, so let's do it to
- # keep things tidy.
- with sched.run_handler_lock:
- self.log.debug('Got run lock')
- config_file = os.path.join(self.test_root, 'zuul.conf')
- with open(config_file, 'w') as f:
- self.config.write(f)
-
- # Make sure the pipeline exists
+ except Exception:
+ plock.release()
+ raise
+ # Grab the run handler lock so that we will continue to avoid
+ # further processing of the event after we release the
+ # pipeline lock (which the delete command needs to acquire).
+ sched.run_handler_lock.acquire()
+ try:
+ plock.release()
+ self.log.debug('Got run lock')
+ config_file = os.path.join(self.test_root, 'zuul.conf')
+ with open(config_file, 'w') as f:
+ self.config.write(f)
+
+ # Make sure the pipeline exists
+ self.getZKTree(
+ f'/zuul/tenant/{tenant.name}/pipeline/{pipeline}/item')
+ # Save the old layout uuid
+ tenant = sched.abide.tenants[tenant.name]
+ old_layout_uuid = tenant.layout.uuid
+ self.log.debug('Deleting pipeline state')
+ p = subprocess.Popen(
+ [os.path.join(sys.prefix, 'bin/zuul-admin'),
+ '-c', config_file,
+ 'delete-pipeline-state',
+ tenant.name, pipeline,
+ ],
+ stdout=subprocess.PIPE)
+ # Delete the pipeline state
+ out, _ = p.communicate()
+ self.log.debug(out.decode('utf8'))
+ # Make sure it's deleted
+ with testtools.ExpectedException(NoNodeError):
self.getZKTree(
- f'/zuul/tenant/tenant-one/pipeline/{pipeline}/item')
- self.log.debug('Deleting pipeline state')
- p = subprocess.Popen(
- [os.path.join(sys.prefix, 'bin/zuul-admin'),
- '-c', config_file,
- 'delete-pipeline-state',
- 'tenant-one', pipeline,
- ],
- stdout=subprocess.PIPE)
- # Delete the pipeline state
- out, _ = p.communicate()
- self.log.debug(out.decode('utf8'))
- # Make sure it's deleted
- with testtools.ExpectedException(NoNodeError):
- self.getZKTree(
- f'/zuul/tenant/tenant-one/pipeline/{pipeline}/item')
+ f'/zuul/tenant/{tenant.name}/pipeline/{pipeline}/item')
+ finally:
+ sched.run_handler_lock.release()
self.executor_server.hold_jobs_in_build = False
self.executor_server.release()
@@ -443,6 +458,11 @@ class TestOnlineZKOperations(ZuulTestCase):
dict(name='project-test1', result='SUCCESS', changes='3,1'),
dict(name='project-test2', result='SUCCESS', changes='3,1'),
], ordered=False)
+ tenant = sched.abide.tenants[tenant.name]
+ new_layout_uuid = tenant.layout.uuid
+ self.assertEqual(old_layout_uuid, new_layout_uuid)
+ self.assertEqual(tenant.layout.pipelines[pipeline].state.layout_uuid,
+ old_layout_uuid)
def test_delete_pipeline_check(self):
self._test_delete_pipeline('check')
diff --git a/zuul/cmd/client.py b/zuul/cmd/client.py
index 1a3738b85..62e51ac3f 100755
--- a/zuul/cmd/client.py
+++ b/zuul/cmd/client.py
@@ -30,16 +30,14 @@ import time
import textwrap
import requests
import urllib.parse
-from uuid import uuid4
import zuul.cmd
from zuul.lib.config import get_default
-from zuul.model import SystemAttributes, PipelineState
+from zuul.model import SystemAttributes, PipelineState, PipelineChangeList
from zuul.zk import ZooKeeperClient
from zuul.lib.keystorage import KeyStorage
-from zuul.zk.locks import tenant_write_lock
+from zuul.zk.locks import tenant_read_lock, pipeline_lock
from zuul.zk.zkobject import ZKContext
-from zuul.zk.layout import LayoutState, LayoutStateStore
from zuul.zk.components import COMPONENT_REGISTRY
@@ -1029,27 +1027,18 @@ class Client(zuul.cmd.ZuulApp):
safe_tenant = urllib.parse.quote_plus(args.tenant)
safe_pipeline = urllib.parse.quote_plus(args.pipeline)
COMPONENT_REGISTRY.create(zk_client)
- with tenant_write_lock(zk_client, args.tenant) as lock:
+ self.log.info('get tenant')
+ with tenant_read_lock(zk_client, args.tenant):
path = f'/zuul/tenant/{safe_tenant}/pipeline/{safe_pipeline}'
- layout_uuid = None
- zk_client.client.delete(path, recursive=True)
- with ZKContext(zk_client, lock, None, self.log) as context:
- ps = PipelineState.new(context, _path=path,
- layout_uuid=layout_uuid)
- ltime = ps._zstat.last_modified_transaction_id
- # Force everyone to make a new layout for this tenant in
- # order to rebuild the shared change queues.
- layout_state = LayoutState(
- tenant_name=args.tenant,
- hostname='admin command',
- last_reconfigured=int(time.time()),
- last_reconfigure_event_ltime=ltime,
- uuid=uuid4().hex,
- branch_cache_min_ltimes={},
- ltime=ltime,
- )
- tenant_layout_state = LayoutStateStore(zk_client, lambda: None)
- tenant_layout_state[args.tenant] = layout_state
+ self.log.info('get pipe')
+ with pipeline_lock(
+ zk_client, args.tenant, args.pipeline
+ ) as plock:
+ self.log.info('got locks')
+ zk_client.client.delete(path, recursive=True)
+ with ZKContext(zk_client, plock, None, self.log) as context:
+ PipelineState.new(context, _path=path, layout_uuid=None)
+ PipelineChangeList.new(context)
sys.exit(0)