diff options
Diffstat (limited to 'morphlib/localrepocache.py')
-rw-r--r-- | morphlib/localrepocache.py | 237 |
1 files changed, 237 insertions, 0 deletions
diff --git a/morphlib/localrepocache.py b/morphlib/localrepocache.py new file mode 100644 index 00000000..8d2030c4 --- /dev/null +++ b/morphlib/localrepocache.py @@ -0,0 +1,237 @@ +# Copyright (C) 2012-2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +import logging +import os +import re +import urllib2 +import urlparse +import string +import tempfile + +import cliapp +import fs.osfs + +import morphlib + + +# urlparse.urljoin needs to know details of the URL scheme being used. +# It does not know about git:// by default, so we teach it here. +gitscheme = ['git'] +urlparse.uses_relative.extend(gitscheme) +urlparse.uses_netloc.extend(gitscheme) +urlparse.uses_params.extend(gitscheme) +urlparse.uses_query.extend(gitscheme) +urlparse.uses_fragment.extend(gitscheme) + + +def quote_url(url): + ''' Convert URIs to strings that only contain digits, letters, % and _. + + NOTE: When changing the code of this function, make sure to also apply + the same to the quote_url() function of lorry. Otherwise the git tarballs + generated by lorry may no longer be found by morph. + + ''' + valid_chars = string.digits + string.letters + '%_' + transl = lambda x: x if x in valid_chars else '_' + return ''.join([transl(x) for x in url]) + + +class NoRemote(morphlib.Error): + + def __init__(self, reponame, errors): + self.reponame = reponame + self.errors = errors + + def __str__(self): + return '\n\t'.join(['Cannot find remote git repository: %s' % + self.reponame] + self.errors) + + +class NotCached(morphlib.Error): + def __init__(self, reponame): + self.reponame = reponame + + def __str__(self): # pragma: no cover + return 'Repository %s is not cached yet' % self.reponame + + +class LocalRepoCache(object): + + '''Manage locally cached git repositories. + + When we build stuff, we need a local copy of the git repository. + To avoid having to clone the repositories for every build, we + maintain a local cache of the repositories: we first clone the + remote repository to the cache, and then make a local clone from + the cache to the build environment. This class manages the local + cached repositories. + + Repositories may be specified either using a full URL, in a form + understood by git(1), or as a repository name to which a base url + is prepended. The base urls are given to the class when it is + created. + + Instead of cloning via a normal 'git clone' directly from the + git server, we first try to download a tarball from a url, and + if that works, we unpack the tarball. + + ''' + + def __init__(self, app, cachedir, resolver, tarball_base_url=None): + self._app = app + self.fs = fs.osfs.OSFS('/') + self._cachedir = cachedir + self._resolver = resolver + if tarball_base_url and not tarball_base_url.endswith('/'): + tarball_base_url += '/' # pragma: no cover + self._tarball_base_url = tarball_base_url + self._cached_repo_objects = {} + + def _git(self, args, cwd=None): # pragma: no cover + '''Execute git command. + + This is a method of its own so that unit tests can easily override + all use of the external git command. + + ''' + + morphlib.git.gitcmd(self._app.runcmd, *args, cwd=cwd) + + def _fetch(self, url, path): # pragma: no cover + '''Fetch contents of url into a file. + + This method is meant to be overridden by unit tests. + + ''' + self._app.status(msg="Trying to fetch %(tarball)s to seed the cache", + tarball=url, chatty=True) + self._app.runcmd(['wget', '-q', '-O-', url], + ['tar', 'xf', '-'], cwd=path) + + def _mkdtemp(self, dirname): # pragma: no cover + '''Creates a temporary directory. + + This method is meant to be overridden by unit tests. + + ''' + return tempfile.mkdtemp(dir=dirname) + + def _escape(self, url): + '''Escape a URL so it can be used as a basename in a file.''' + + # FIXME: The following is a nicer way than to do this. + # However, for compatibility, we need to use the same as the + # tarball server (set up by Lorry) uses. + # return urllib.quote(url, safe='') + + return quote_url(url) + + def _cache_name(self, url): + scheme, netloc, path, query, fragment = urlparse.urlsplit(url) + if scheme != 'file': + path = os.path.join(self._cachedir, self._escape(url)) + return path + + def has_repo(self, reponame): + '''Have we already got a cache of a given repo?''' + url = self._resolver.pull_url(reponame) + path = self._cache_name(url) + return self.fs.exists(path) + + def _clone_with_tarball(self, repourl, path): + tarball_url = urlparse.urljoin(self._tarball_base_url, + self._escape(repourl)) + '.tar' + try: + self.fs.makedir(path) + self._fetch(tarball_url, path) + self._git(['config', 'remote.origin.url', repourl], cwd=path) + self._git(['config', 'remote.origin.mirror', 'true'], cwd=path) + self._git(['config', 'remote.origin.fetch', '+refs/*:refs/*'], + cwd=path) + except BaseException, e: # pragma: no cover + if self.fs.exists(path): + self.fs.removedir(path, force=True) + return False, 'Unable to extract tarball %s: %s' % ( + tarball_url, e) + + return True, None + + def cache_repo(self, reponame): + '''Clone the given repo into the cache. + + If the repo is already cloned, do nothing. + + ''' + errors = [] + if not self.fs.exists(self._cachedir): + self.fs.makedir(self._cachedir, recursive=True) + + try: + return self.get_repo(reponame) + except NotCached, e: + pass + + repourl = self._resolver.pull_url(reponame) + path = self._cache_name(repourl) + if self._tarball_base_url: + ok, error = self._clone_with_tarball(repourl, path) + if ok: + return self.get_repo(reponame) + else: + errors.append(error) + self._app.status( + msg='Failed to fetch tarball, falling back to git clone.') + target = self._mkdtemp(self._cachedir) + try: + self._git(['clone', '--mirror', '-n', repourl, target]) + except cliapp.AppException, e: + errors.append('Unable to clone from %s to %s: %s' % + (repourl, target, e)) + if self.fs.exists(target): + self.fs.removedir(target, recursive=True, force=True) + raise NoRemote(reponame, errors) + + self.fs.rename(target, path) + return self.get_repo(reponame) + + def get_repo(self, reponame): + '''Return an object representing a cached repository.''' + + if reponame in self._cached_repo_objects: + return self._cached_repo_objects[reponame] + else: + repourl = self._resolver.pull_url(reponame) + path = self._cache_name(repourl) + if self.fs.exists(path): + repo = morphlib.cachedrepo.CachedRepo(self._app, reponame, + repourl, path) + self._cached_repo_objects[reponame] = repo + return repo + raise NotCached(reponame) + + def get_updated_repo(self, reponame): # pragma: no cover + '''Return object representing cached repository, which is updated.''' + + self._app.status(msg='Updating git repository %s in cache' % reponame) + if not self._app.settings['no-git-update']: + cached_repo = self.cache_repo(reponame) + cached_repo.update() + else: + cached_repo = self.get_repo(reponame) + return cached_repo + |