From 755dc86c41392c7065440d147258befbbeb5c1ee Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Jul 2020 22:47:00 +0100 Subject: givemejob: Stop synthesising host information when it's missing Any lorry in the database should have been either: 1. Generated based on a 'trove' or 'gitlab' section, with 'from_host' set to the Upstream Host name. 2. Read from a lorry referred to in a 'lorries' section, with 'from_host' set to an empty string. So this error case indicates a dangling reference in the database. The current schema does not use foreign keys so this could happen, but it's reasonable to just give up and return empty metadata. --- lorrycontroller/givemejob.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'lorrycontroller') diff --git a/lorrycontroller/givemejob.py b/lorrycontroller/givemejob.py index 1a0fe35..8f4a2a8 100644 --- a/lorrycontroller/givemejob.py +++ b/lorrycontroller/givemejob.py @@ -76,16 +76,9 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute): try: host_info = statedb.get_host_info(lorry_info['from_host']) except lorrycontroller.HostNotFoundError: - # XXX We don't know whether upstream is Trove. It should be - # possible to set host type for single repositories. - host_info = { - 'host': lorry_info['from_host'], - 'protocol': 'ssh', - 'username': None, - 'password': None, - 'type': 'trove', - 'type_params': {}, - } + # XXX Shouldn't happen, but currently the database schema + # does not prevent it + return {} metadata = lorrycontroller.get_upstream_host(host_info) \ .get_repo_metadata(lorry_info['from_path']) -- cgit v1.2.1 From bc7f80d39b0bd8dd1484567b89c2e8801754d077 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Jul 2020 23:50:58 +0100 Subject: givemejob: Move upstream host metadata lookup out of get_repo_metadata In preparation for adding metadata for single repositories, move the code for looking up metadata through the Upstream Host connector into a separate function. --- lorrycontroller/givemejob.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'lorrycontroller') diff --git a/lorrycontroller/givemejob.py b/lorrycontroller/givemejob.py index 8f4a2a8..721b55e 100644 --- a/lorrycontroller/givemejob.py +++ b/lorrycontroller/givemejob.py @@ -65,12 +65,9 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute): due = lorry_info['last_run'] + lorry_info['interval'] return (lorry_info['running_job'] is None and due <= now) - def get_repo_metadata(self, statedb, lorry_info): - '''Get repository head and description.''' - - if not lorry_info['from_host']: - return {} - + @staticmethod + def get_upstream_host_repo_metadata(lorry_info): + assert lorry_info['from_host'] assert lorry_info['from_path'] try: @@ -79,14 +76,25 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute): # XXX Shouldn't happen, but currently the database schema # does not prevent it return {} - - metadata = lorrycontroller.get_upstream_host(host_info) \ + else: + return lorrycontroller.get_upstream_host(host_info) \ .get_repo_metadata(lorry_info['from_path']) - if 'description' in metadata: + + def get_repo_metadata(self, statedb, lorry_info): + '''Get repository head and description.''' + + host_name = lorry_info['from_host'] + if host_name: + metadata = self.get_upstream_host_repo_metadata(lorry_info) + else: + metadata = {} + + if host_name and 'description' in metadata: # Prepend Upstream Host name metadata['description'] = '{host}: {desc}'.format( - host=lorry_info['from_host'], + host=host_name, desc=metadata['description']) + return metadata def give_job_to_minion(self, statedb, lorry_info, now): -- cgit v1.2.1 From 1263e62a2fafc590017b52f3e8c5ee4a94d56212 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Jul 2020 23:52:14 +0100 Subject: givemejob: Add metadata for single repositories When mirroring a repository found through a 'lorries' configuration section (instead of 'trove' or 'gitlab'), we currently don't set a description or default branch. * Set the description to the upstream repository path, but allow this to be overridden by a description field in the .lorry file. Prepend the host-name, just as we do when mirroring an Upstream Host. * Set the default branch to: - Bazaar: 'trunk' - Git: upstream default branch, found using 'git ls-remote' - others: 'master' Closes #15. --- lorrycontroller/givemejob.py | 91 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 2 deletions(-) (limited to 'lorrycontroller') diff --git a/lorrycontroller/givemejob.py b/lorrycontroller/givemejob.py index 721b55e..ee998ea 100644 --- a/lorrycontroller/givemejob.py +++ b/lorrycontroller/givemejob.py @@ -13,10 +13,13 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - +import json import logging +import re +import urllib.parse import bottle +import cliapp import lorrycontroller @@ -80,6 +83,90 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute): return lorrycontroller.get_upstream_host(host_info) \ .get_repo_metadata(lorry_info['from_path']) + @staticmethod + def get_single_repo_metadata(lorry_info): + assert not lorry_info['from_host'] + + lorry_dict = json.loads(lorry_info['text']) + _, upstream_config = lorry_dict.popitem() + upstream_type = upstream_config['type'] + + # Get the repository URL + url = None + try: + url = upstream_config['url'].strip() + except KeyError: + if upstream_type == 'bzr': + try: + url = upstream_config['branches']['trunk'].strip() + except KeyError: + pass + + # Extract the host-name and repo path + host_name, repo_path = None, None + if url: + # Handle pseudo-URLs + if upstream_type == 'bzr': + if url.startswith('lp:'): + host_name = 'launchpad.net' + repo_path = url[3:] + elif upstream_type == 'cvs': + # :pserver:user@host:/path, user@host:/path, etc. + match = re.match(r'^(?::[^:@/]+:)?(?:[^:@/]+@)?([^:@/]+):/', + url) + if match: + host_name = match.group(1) + repo_path = url[match.end():].rstrip('/') + elif upstream_type == 'git': + # user@host:path, host:path. Path must not start with + # '//' as that indicates a real URL. + match = re.match(r'^(?:[^:@/]+@)?([^:@/]+):(?!//)', url) + if match: + host_name = match.group(1) + repo_path = url[match.end():].strip('/') + + # Default to parsing as a real URL + if not host_name: + try: + url_obj = urllib.parse.urlparse(url) + except ValueError: + pass + else: + host_name = url_obj.hostname + repo_path = url_obj.path.strip('/') + + metadata = {} + + # Determine the default branch + if upstream_type == 'bzr': + # Default in Bazaar is 'trunk' and we don't remap it + metadata['head'] = 'trunk' + elif upstream_type == 'git': + if url: + # Query the remote to find its default + try: + output = cliapp.runcmd(['git', 'ls-remote', '--symref', + '--', url, 'HEAD']) \ + .decode('utf-8', errors='replace') + match = re.match(r'^ref: refs/heads/([^\s]+)\tHEAD\n', + output) + if match: + metadata['head'] = match.group(1) + except cliapp.AppException: + pass + else: + # We currently produce 'master' for all other types + metadata['head'] = 'master' + + # Use description from .lorry file, or repository name + try: + metadata['description'] = upstream_config['description'] + except KeyError: + if repo_path: + metadata['description'] = repo_path + + return host_name, metadata + def get_repo_metadata(self, statedb, lorry_info): '''Get repository head and description.''' @@ -87,7 +174,7 @@ class GiveMeJob(lorrycontroller.LorryControllerRoute): if host_name: metadata = self.get_upstream_host_repo_metadata(lorry_info) else: - metadata = {} + host_name, metadata = self.get_single_repo_metadata(lorry_info) if host_name and 'description' in metadata: # Prepend Upstream Host name -- cgit v1.2.1 From 68f21f0475e83f839bd8e25de482d1aa60582edd Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Wed, 15 Jul 2020 22:29:32 +0100 Subject: gitlab: Ignore failure to set the default branch In testing the change to define the default branch for single repositories, I found that GitLab will reject a change of default branch if the branch does not yet exist. (It doesn't seem to do this when creating a repository.) Ignore failure to change the default branch, as we will fix it on the next run after the branch has been created. --- lorrycontroller/gitlab.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'lorrycontroller') diff --git a/lorrycontroller/gitlab.py b/lorrycontroller/gitlab.py index 0b5e1c2..266861c 100644 --- a/lorrycontroller/gitlab.py +++ b/lorrycontroller/gitlab.py @@ -76,13 +76,22 @@ class GitlabDownstream(hosts.DownstreamHost): else: logging.info('Project %s exists in local GitLab already.', repo_path) - if 'head' in metadata \ - and project.default_branch != metadata['head']: - project.default_branch = metadata['head'] + if 'description' in metadata \ and project.description != metadata['description']: project.description = metadata['description'] project.save() + + # This will fail if we haven't created the branch yet. + # We'll fix it next time round. + try: + if 'head' in metadata \ + and project.default_branch != metadata['head']: + project.default_branch = metadata['head'] + project.save() + except gitlab.GitlabUpdateError: + pass + return path_comps = repo_path.split('/') -- cgit v1.2.1