From 26343d90fef8e99b64db56fc21e7857e5329d957 Mon Sep 17 00:00:00 2001 From: Richard Ipsum Date: Fri, 5 Dec 2014 17:45:46 +0000 Subject: Add pip extension Conflicts: README baserockimport/app.py baserockimport/mainloop.py --- README | 3 + README.python | 58 +++++ TODO.python | 33 +++ baserockimport/app.py | 21 ++ baserockimport/exts/importer_pip_common.py | 100 ++++++++ baserockimport/exts/pip.find_deps | 352 +++++++++++++++++++++++++++ baserockimport/exts/pip.to_chunk | 23 ++ baserockimport/exts/pip.to_lorry | 224 +++++++++++++++++ baserockimport/exts/pip_find_deps_tests.py | 369 +++++++++++++++++++++++++++++ baserockimport/exts/pip_lorry_tests.py | 60 +++++ baserockimport/mainloop.py | 18 +- 11 files changed, 1255 insertions(+), 6 deletions(-) create mode 100644 README.python create mode 100644 TODO.python create mode 100644 baserockimport/exts/importer_pip_common.py create mode 100755 baserockimport/exts/pip.find_deps create mode 100755 baserockimport/exts/pip.to_chunk create mode 100755 baserockimport/exts/pip.to_lorry create mode 100755 baserockimport/exts/pip_find_deps_tests.py create mode 100755 baserockimport/exts/pip_lorry_tests.py diff --git a/README b/README index 9c4fca0..a3b0df7 100644 --- a/README +++ b/README @@ -117,3 +117,6 @@ URL in a .lorry file because someone wrote it wrong. The TODO file notes that if the .to_lorry program tried fetching the URL first and detecting if it returns a 404 'not found' error then we wouldn't hit this problem. + +For more package-system specific information, see the relevant README file, e.g +README.rubygems for RubyGem imports. diff --git a/README.python b/README.python new file mode 100644 index 0000000..84294df --- /dev/null +++ b/README.python @@ -0,0 +1,58 @@ +README +------ + +Most (nearly all) python packages use setuptools, for detailed information on +setuptools see the setuptools docs[1]. If you're not familiar with setuptools +you should read the docs[1][2] before continuing. + +Please note that this tool expects any python packages to be on pypi, you +cannot currently import packages from other places. + +This import tool uses a combination of pypi metadata, +pip and setuptools commands to extract dependency information +to create a set of definitions useable with Baserock. This is not a stable +process and will not work smoothly in many cases: because setup.py +is just an ordinary Python script it's possible for a setup.py to do things that +break the import tool's means to extract dependencies, for example, some packages +bypass parts of setuptools and subclass parts of distutils's core instead. +Another problem with importing python packages is that packages are uploaded +to pypi as tarballs rather than as repositories and as a result the import tool +generates a lot of tarball lorries which is the least desireable kind of lorry +to use with Baserock. To avoid this the import tool looks through parts of the +package metadata for links to real repos, this detection is currently extremely +basic and will hopefully be improved in future to allow the tool to reduce the +number of tarball lorries it generates. Some python packages +only declare their dependency information in a human readable form within a +README, this tool cannot do anything to extract dependency +information that is not encoded in a machine readable fashion. At the time of +writing numpy is an example of such a package: running the import tool on numpy +will yield a stratum that contains numpy and none of its dependencies. + +Python packages may require other packages to be present for +build/installation to proceed, in setuptools these are called setup requirements. +Setup requirements naturally translate to Baserock build dependencies, in +practice most python packages don't have any setup requirements, so the lists +of build-depends for each chunk will generally be empty lists. + +Many python packages require additional (in addition to a python interpreter) +packages to be present at runtime, in setuptools parlance these are install +requirements. The import tool uses pip to recursively extract runtime +dependency information for a given package, each dependency is added to the +same stratum as the package we're trying to import. All packages implicitly +depend on a python interpreter, the import tool encodes this by making all +strata build depend on core, which at the time of writing contains cpython. + +Traps +----- + +* Because pip executes setup.py commands to determine dependencies +and some packages' setup.py files invoke compilers, the import tool may end up +running compilers. + +* pip puts errors on stdout, some import tool errors may be vague: if it's +not clear what's going on you can check the log, if you're using +--log-level=debug then the import tool will log the output of all the commands +it executes to obtain dependency information. + +[1]: https://pythonhosted.org/setuptools/ +[2]: https://pythonhosted.org/an_example_pypi_project/setuptools.html diff --git a/TODO.python b/TODO.python new file mode 100644 index 0000000..5bed449 --- /dev/null +++ b/TODO.python @@ -0,0 +1,33 @@ +TODOs +----- + +* if homepage_url (in the pypi metadata for a given pacakge) is a html page, +scrape the page for repo links, this should reduce the number of tarball +imports the tool does. + +* scheme x.y e.g. pip.find_deps should avoid using a '.' makes it more +difficult to import extensions as modules, consider the case where we want +to import pip.find_deps for use in a test suite. + +* prefix cmd to logs, so when we run pip, prefix log msg with 'pip', +same for egg_info etc + +* abstract popen/log, +there is a pattern of calling Popen with stderr=STDOUT and reading +from p.stdout till EOF, then waiting for the subprocess to terminate. +Since this is used in 3 places, it should be factored out really. + +* error messages for constraints is currently a parsed form of the version +number e.g. ('==', ('00000000', '00000000', '00000011', '*final')) +this will be confusing, we should emit nice version numbers. + +* Can we avoid the compilation that happens during import of some packages, +i.e. nixtla + +* need better errmsg if initial package not found on pypi + +* Importing python packages that use pbr fails, see +https://bitbucket.org/pypa/setuptools/issue/73/typeerror-dist-must-be-a-distribution#comment-7267980 +The most sensible option would seem to be to make use of the sane environment +that pbr provides: just read the dependency information from the text files +that pbr projects provide, see, http://docs.openstack.org/developer/pbr/ diff --git a/baserockimport/app.py b/baserockimport/app.py index 3ebe7cb..c8f5ae6 100644 --- a/baserockimport/app.py +++ b/baserockimport/app.py @@ -82,6 +82,8 @@ class BaserockImportApplication(cliapp.Application): arg_synopsis='REPO PROJECT_NAME SOFTWARE_NAME') self.add_subcommand('rubygems', self.import_rubygems, arg_synopsis='GEM_NAME [GEM_VERSION]') + self.add_subcommand('python', self.import_python, + arg_synopsis='PACKAGE_NAME [VERSION]') self.stdout_has_colours = self._stream_has_colours(sys.stdout) @@ -183,3 +185,22 @@ class BaserockImportApplication(cliapp.Application): goal_kind='rubygems', goal_name=args[0], goal_version='master') loop.enable_importer('rubygems', strata=['strata/ruby.morph']) loop.run() + + def import_python(self, args): + '''Import one or more python packages.''' + if len(args) < 1 or len(args) > 2: + raise cliapp.AppException( + 'Please pass the name of the python package on the commandline.') + + package_name = args[0] + + # TODO: master for default for us probably doesn't make sense + package_version = args[1] if len(args) == 2 else 'master' + + # TODO: maybe rename so goal_kind is 'python' rather than 'pip' + loop = baserockimport.mainloop.ImportLoop(app=self, + goal_kind='pip', + goal_name=package_name, + goal_version=package_version) + loop.enable_importer('pip', strata=['strata/core.morph']) + loop.run() diff --git a/baserockimport/exts/importer_pip_common.py b/baserockimport/exts/importer_pip_common.py new file mode 100644 index 0000000..be8346d --- /dev/null +++ b/baserockimport/exts/importer_pip_common.py @@ -0,0 +1,100 @@ +from __future__ import print_function + +import sys +import logging + +from importer_base import ImportExtension + +PYPI_URL = 'http://pypi.python.org/pypi' + +# TODO: I'm guessing these things are probably standard somewhere +def warn(*args, **kwargs): + print('%s:' % sys.argv[0], *args, file=sys.stderr, **kwargs) + +def error(*args, **kwargs): + warn(*args, **kwargs) + sys.exit(1) + +def specs_satisfied(version, specs): + def mapping_error(op): + # We parse ops with requirements-parser, so any invalid user input + # should be detected there. This really guards against + # the pip developers adding some new operation to a requirement. + error("Invalid op in spec: %s" % op) + + opmap = {'==' : lambda x, y: x == y, '!=' : lambda x, y: x != y, + '<=' : lambda x, y: x <= y, '>=' : lambda x, y: x >= y, + '<': lambda x, y: x < y, '>' : lambda x, y: x > y} + + def get_op_func(op): + return opmap[op] if op in opmap else lambda x, y: mapping_error(op) + + return all([get_op_func(op)(version, sv) for (op, sv) in specs]) + +def name_or_closest(client, package_name): + '''Obtain a list of releases for a given package, + packages on pypi are case insensitive, so we need this hack + to obtain a release when our input package name doesn't + case-sensitively match the package name used on pypi''' + + # TODO: update above comment + + results = client.package_releases(package_name) + + if len(results) > 0: + logging.debug('Found package %s' % package_name) + return package_name + + logging.debug("Couldn't find exact match for %s," + "searching for a similar match" % package_name) + results = client.search({'name': package_name}) + + logging.debug("Got the following similarly named packages '%s': %s" + % (package_name, str([(result['name'], result['version']) + for result in results]))) + + logging.debug('Filtering for exact case-insensitive matches') + + results = [result for result in results + if result['name'].lower() == package_name.lower()] + + logging.debug('Filtered results: %s' % results) + + return results[0]['name'] if len(results) > 0 else None + +def find_releases(client, package_name): + + logging.debug('Finding releases for %s' % package_name) + results = client.package_releases(package_name) + + if len(results) > 0: + logging.debug('Found package %s' % package_name) + return results + + logging.debug("Couldn't find exact match for %s," + "searching for a similar match" % package_name) + results = client.search({'name': package_name}) + + logging.debug("Got the following similarly named packages '%s': %s" + % (package_name, str([(result['name'], result['version']) + for result in results]))) + + logging.debug('Filtering for exact case-insensitive matches') + + results = [result for result in results + if result['name'].lower() == package_name.lower()] + + logging.debug('Filtered results: %s' % results) + + return (client.package_releases(results[0]['name']) + if len(results) > 0 else []) + +# We subclass the ImportExtension to setup the logger, +# so that we can send logs to the import tool's log (morph's log in fact) +class PythonExtension(ImportExtension): + def __init__(self): + super(PythonExtension, self).__init__() + + def process_args(self, _): + import __main__ + __main__.main() diff --git a/baserockimport/exts/pip.find_deps b/baserockimport/exts/pip.find_deps new file mode 100755 index 0000000..5e52d19 --- /dev/null +++ b/baserockimport/exts/pip.find_deps @@ -0,0 +1,352 @@ +#!/usr/bin/env python +# +# Find the build and runtime dependencies for a given Python package +# +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +# TODO: there is a pattern of calling Popen with stderr=STDOUT and reading +# from p.stdout till EOF, then waiting for the subprocess to terminate. +# Since this is used in 3 places, it should be factored out really. + +from __future__ import print_function + +import sys +import subprocess +import os +import json +import tempfile +import logging +import select +import signal + +import pkg_resources +import xmlrpclib + +from importer_pip_common import * + +class ConflictError(Exception): + def __init__(self, name, spec_x, spec_y): + self.name = name + self.specs = [spec_x, spec_y] + + super(ConflictError, self).__init__('%s: %s conflicts with %s' + % (name, spec_x, spec_y)) + +class UnmatchedError(Exception): + pass + +def eq_check((xop, xval), (yop, yval)): + assert xop == '==' # Assumption, '==' spec is x + + ops = (xop, yop) + vals = (xval, yval) + + # Map a pair to a function that will return true + # if the specs are in conflict. + comp = {('==', '=='): lambda (x, y): x != y, # conflict if x != y + ('==', '!='): lambda (x, y): x == y, # conflict if x == y + ('==', '<'): lambda (x, y): x >= y, # conflict if x >= y + ('==', '>'): lambda (x, y): x <= y, # conflict if x <= y + ('==', '<='): lambda (x, y): x > y, # conflict if x > y + ('==', '>='): lambda (x, y): x < y, # conflict if x < y + } + + return comp[ops](vals) + +def lt_check((xop, xval), (yop, yval)): + assert xop == '<' # Assumption, '<' spec is x + + ops = (xop, yop) + vals = (xval, yval) + + # Map a pair to a function that will return true + # if the specs are in conflict. + comp = {('<', '<'): lambda (x, y): False, # < x < y cannot conflict + ('<', '>'): lambda (x, y): x <= y, # conflict if x <= y + ('<', '<='): lambda (x, y): False, # < x <= y cannot conflict + ('<', '>='): lambda (x, y): x <= y # conflict if x <= y + } + + return comp[ops](vals) + +def gt_check((xop, xval), (yop, yval)): + assert xop == '>' # Assumption, '>' spec is x + + ops = (xop, yop) + vals = (xval, yval) + + # Map a pair to a function that will return true + # if the specs are in conflict. + comp = {('>', '>'): lambda (x, y): False, # > x > y cannot conflict + ('>', '<='): lambda (x, y): x >= y, # conflict if x >= y + ('>', '>='): lambda (x, y): False, # > x >= y cannot conflict + } + + return comp[ops](vals) + +def lte_check((xop, xval), (yop, yval)): + assert xop == '<=' # Assumption, '<=' spec is x + + ops = (xop, yop) + vals = (xval, yval) + + # Map a pair to a function that will return true + # if the specs are in conflict. + comp = {('<=', '<='): lambda (x, y): False, # <= x <= y cannot conflict + ('<=', '>='): lambda (x, y): x < y + } + + return comp[ops](vals) + +def gte_check((xop, xval), (yop, yval)): + assert xop == '>=' # Assumption, '>=' spec is x + + ops = (xop, yop) + vals = (xval, yval) + + # Map a pair to a function that will return true + # if the specs are in conflict. + comp = {('>=', '>='): lambda (x, y): False} # >= x >= y cannot conflict + + return comp[ops](vals) + +def reverse_if(c, t1, t2): + return [t2, t1] if c else (t1, t2) + +def conflict((xop, xval), (yop, yval)): + x, y = (xop, xval), (yop, yval) + ops = (xop, yop) + + if '==' in ops: return eq_check(*reverse_if(yop == '==', x, y)) + elif '!=' in ops: return False # != can only conflict with == + elif '<' in ops: return lt_check(*reverse_if(yop == '<', x, y)) + elif '>' in ops: return gt_check(*reverse_if(yop == '>', x, y)) + elif '<=' in ops: return lte_check(*reverse_if(yop == '<=', x, y)) + + # not reversing here, >= x >= y should be the only combination possible + # here, if it's not then something is wrong. + elif '>=' in ops: return gte_check(x, y) + + else: raise UnmatchedError('Got unmatched case (%s, %s)' % x, y) + +def conflict_with_set(spec, specset): + for s in specset: + if conflict(spec, s): + return s + + return None + +def resolve_specs(requirements): + requirements = list(requirements) + + logging.debug('Resolving specs from the following requirements: %s' + % requirements) + specsets = {} + + for r in requirements: + if r.project_name not in specsets: + specsets[r.project_name] = set() + + specset = specsets[r.project_name] + + for (op, version) in r.specs: + spec = (op, pkg_resources.parse_version(version)) + + c = conflict_with_set(spec, specset) + if not c: + specset.add(spec) + else: + raise ConflictError(r.project_name, c, spec) + + return specsets + +def resolve_versions(specsets): + logging.debug('Resolving versions') + versions = {} + + for (proj_name, specset) in specsets.iteritems(): + client = xmlrpclib.ServerProxy(PYPI_URL) + + # Bit of a hack to deal with pypi case insensitivity + new_proj_name = name_or_closest(client, proj_name) + if new_proj_name == None: + error("Couldn't find any project with name '%s'" % proj_name) + + logging.debug("Treating %s as %s" % (proj_name, new_proj_name)) + proj_name = new_proj_name + + releases = find_releases(client, proj_name) + + logging.debug('Found %d releases of %s: %s' + % (len(releases), proj_name, releases)) + + candidates = [v for v in releases + if specs_satisfied(pkg_resources.parse_version(v), specset)] + + if len(candidates) == 0: + error("Couldn't find any version of %s to satisfy: %s" + % (proj_name, specset)) + + logging.debug('Found %d releases of %s that satisfy constraints: %s' % + (len(candidates), proj_name, candidates)) + + assert proj_name not in versions + versions[proj_name] = candidates + + return versions + +def find_build_deps(source, name, version=None): + logging.debug('Finding build dependencies for %s%s at %s' + % (name, ' %s' % version if version else '', source)) + + # This amounts to running python setup.py egg_info and checking + # the resulting egg_info dir for a file called setup_requires.txt + + logging.debug('Running egg_info command') + + p = subprocess.Popen(['python', 'setup.py', 'egg_info'], cwd=source, + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + + while True: + line = p.stdout.readline() + if line == '': + break + + logging.debug(line.rstrip('\n')) + + p.wait() # even with eof, wait for termination + + if p.returncode != 0: + # Something went wrong, but in most cases we can probably still + # successfully import without knowing the setup_requires list + # because many python packages have an empty setup_requires list. + logging.warning("Couldn't obtain build dependencies for %s:" + " egg_info command failed" + " (%s may be using distutils rather than setuptools)" + % (name, name)) + + egg_dir = '%s.egg-info' % name + build_deps_file = os.path.join(source, egg_dir, 'setup_requires.txt') + + build_deps = {} + + # Check whether there's a setup_requires.txt + if not os.path.isfile(build_deps_file): + build_deps = {} + else: + with open(build_deps_file) as f: + specsets = resolve_specs(pkg_resources.parse_requirements(f)) + logging.debug("Resolved specs for %s: %s" % (name, specsets)) + + versions = resolve_versions(specsets) + logging.debug('Resolved versions: %s' % versions) + + # Since any of the candidates in versions should satisfy + # all specs, we just pick the first version we see + build_deps = {name: vs[0] for (name, vs) in versions.iteritems()} + + return build_deps + +def find_runtime_deps(source, name, version=None, use_requirements_file=False): + logging.debug('Finding runtime dependencies for %s%s at %s' + % (name, ' %s' % version if version else '', source)) + + # Run our patched pip to get a list of installed deps + # Run pip install . --list-dependencies=instdeps.txt with cwd=source + + # Some temporary file needed for storing the requirements + tmpfd, tmppath = tempfile.mkstemp() + logging.debug('Writing install requirements to: %s', tmppath) + + args = ['pip', 'install', '.', '--list-dependencies=%s' % tmppath] + if use_requirements_file: + args.insert(args.index('.') + 1, '-r') + args.insert(args.index('.') + 2, 'requirements.txt') + + logging.debug('Running pip, args: %s' % args) + + p = subprocess.Popen(args, cwd=source, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + while True: + line = p.stdout.readline() + if line == '': + break + + logging.debug(line.rstrip('\n')) + + p.wait() # even with eof, wait for termination + + logging.debug('pip exited with code: %d' % p.returncode) + + if p.returncode != 0: + error('failed to get runtime dependencies for %s %s at %s' + % (name, version, source)) + + with os.fdopen(tmpfd) as tmpfile: + ss = resolve_specs(pkg_resources.parse_requirements(tmpfile)) + logging.debug("Resolved specs for %s: %s" % (name, ss)) + + logging.debug("Removing root package from specs") + # filter out "root" package + specsets = {k: v for (k, v) in ss.iteritems() if k != name} + + versions = resolve_versions(specsets) + logging.debug('Resolved versions: %s' % versions) + + # Since any of the candidates in versions should satisfy + # all specs, we just pick the first version we see + runtime_deps = {name: vs[0] for (name, vs) in versions.iteritems()} + + os.remove(tmppath) + + if (len(runtime_deps) == 0 and not use_requirements_file + and os.path.isfile(os.path.join(source, 'requirements.txt'))): + logging.debug('No install requirements specified in setup.py,' + ' using requirements file') + return find_runtime_deps(source, name, version, + use_requirements_file=True) + + return runtime_deps + +def main(): + if len(sys.argv) not in [3, 4]: + print('usage: %s PACKAGE_SOURCE_DIR NAME [VERSION]' % sys.argv[0]) + sys.exit(1) + + logging.debug('%s: sys.argv[1:]: %s' % (sys.argv[0], sys.argv[1:])) + source, name = sys.argv[1:3] + version = sys.argv[3] if len(sys.argv) == 4 else None + + client = xmlrpclib.ServerProxy(PYPI_URL) + new_name = name_or_closest(client, name) + + if new_name == None: + error("Couldn't find any project with name '%s'" % name) + + logging.debug('Treating %s as %s' % (name, new_name)) + name = new_name + + deps = {} + deps['build-dependencies'] = find_build_deps(source, name, version) + deps['runtime-dependencies'] = find_runtime_deps(source, name, version) + + root = {'pip': deps} + + print(json.dumps(root)) + +if __name__ == '__main__': + PythonExtension().run() diff --git a/baserockimport/exts/pip.to_chunk b/baserockimport/exts/pip.to_chunk new file mode 100755 index 0000000..99fda7c --- /dev/null +++ b/baserockimport/exts/pip.to_chunk @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +# We can get rid of this nonsense once we modify the import tool + +from __future__ import print_function + +import sys + +if len(sys.argv) not in [3, 4]: + # TODO: we don't need the package_source_dir for anything + # but the import tool expects us to need it + # we also don't need the version, + # again the import tool expects us to need it + print('usage: %s package_source_dir chunk_name [version]' % sys.argv[0], + file=sys.stderr) + sys.exit(1) + +print('''name: %s +kind: chunk +build-commands: +- python setup.py build +install-commands: +- python setup.py install --prefix=/usr --root "$DESTDIR"''' % sys.argv[2]) diff --git a/baserockimport/exts/pip.to_lorry b/baserockimport/exts/pip.to_lorry new file mode 100755 index 0000000..451c9a7 --- /dev/null +++ b/baserockimport/exts/pip.to_lorry @@ -0,0 +1,224 @@ +#!/usr/bin/env python +# +# Create a Baserock .lorry file for a given Python package +# +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +from __future__ import print_function + +import subprocess +import requests +import json +import sys +import shutil +import tempfile +import xmlrpclib +import logging +import select + +import pkg_resources + +from importer_pip_common import * + +def fetch_package_metadata(package_name): + try: + result = requests.get('%s/%s/json' % (PYPI_URL, package_name)) + + # raise exception if status code is not 200 OK + result.raise_for_status() + except Exception as e: + error("Couldn't fetch package metadata:", e) + + return result.json() + +def find_repo_type(url): + + # Don't bother with detection if we can't get a 200 OK + logging.debug("Getting '%s' ..." % url) + + status_code = requests.get(url).status_code + if status_code != 200: + logging.debug('Got %d status code from %s, aborting repo detection' + % (status_code, url)) + return None + + logging.debug('200 OK for %s' % url) + logging.debug('Finding repo type for %s' % url) + + vcss = [('git', 'clone'), ('hg', 'clone'), + ('svn', 'checkout'), ('bzr', 'branch')] + + for (vcs, vcs_command) in vcss: + logging.debug('Trying %s %s' % (vcs, vcs_command)) + tempdir = tempfile.mkdtemp() + + p = subprocess.Popen([vcs, vcs_command, url], stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, stdin=subprocess.PIPE, + cwd=tempdir) + + # We close stdin on parent side to prevent the child from blocking + # if it reads on stdin + p.stdin.close() + + while True: + line = p.stdout.readline() + if line == '': + break + + logging.debug(line.rstrip('\n')) + + p.wait() # even with eof on both streams, we still wait + + shutil.rmtree(tempdir) + + if p.returncode == 0: + logging.debug('%s is a %s repo' % (url, vcs)) + return vcs + + logging.debug("%s doesn't seem to be a repo" % url) + + return None + +def get_compression(url): + bzip = 'bzip2' + gzip = 'gzip' + lzma = 'lzma' + + m = {'tar.gz': gzip, 'tgz': gzip, 'tar.Z': gzip, + 'tar.bz2': bzip, 'tbz2': bzip, + 'tar.lzma': lzma, 'tar.xz': lzma, 'tlz': lzma, 'txz': lzma} + + for x in [1, 2]: + ext = '.'.join(url.split('.')[-x:]) + if ext in m: return m[ext] + + return None + +# Assumption: url passed to this function must have a 'standard' tar extension +def make_tarball_lorry(package_name, url): + # TODO: this prefix probably shouldn't be hardcoded here either + name = 'python-packages/%s' % package_name.lower() + + lorry = {'type': 'tarball', 'url': url} + compression = get_compression(url) + if compression: + lorry['compression'] = compression + + return json.dumps({name + "-tarball": lorry}, indent=4, sort_keys=True) + +def filter_urls(urls): + allowed_extensions = ['tar.gz', 'tgz', 'tar.Z', 'tar.bz2', 'tbz2', + 'tar.lzma', 'tar.xz', 'tlz', 'txz', 'tar'] + + def allowed_extension(url): + return ('.'.join(url['url'].split('.')[-2:]) in allowed_extensions + or url['url'].split('.')[-1:] in allowed_extensions) + + return filter(allowed_extension, urls) + +def get_releases(client, requirement): + try: + #releases = find_releases(client, requirement.project_name) + releases = client.package_releases(requirement.project_name) + except Exception as e: + error("Couldn't fetch release data:", e) + + return releases + +def generate_tarball_lorry(client, requirement): + releases = get_releases(client, requirement) + + if len(releases) == 0: + error("Couldn't find any releases for package %s" + % requirement.project_name) + + releases = [v for v in releases if specs_satisfied(v, requirement.specs)] + + if len(releases) == 0: + error("Couldn't find any releases of %s" + " that satisfy version constraints: %s" + % (requirement.project_name, requirement.specs)) + + release_version = releases[0] + + logging.debug('Fetching urls for package %s with version %s' + % (requirement.project_name, release_version)) + + try: + # Get a list of dicts, the dicts contain the urls. + urls = client.release_urls(requirement.project_name, release_version) + except Exception as e: + error("Couldn't fetch release urls:", e) + + tarball_urls = filter_urls(urls) + + if len(tarball_urls) > 0: + urls = tarball_urls + elif len(urls) > 0: + warn("None of these urls look like tarballs:") + for url in urls: + warn("\t%s" % url['url']) + error("Cannot proceed") + else: + error("Couldn't find any download urls for package %s" + % requirement.project_name) + + url = urls[0]['url'] + + return make_tarball_lorry(requirement.project_name, url) + +def str_repo_lorry(package_name, repo_type, url): + # TODO: this prefix probably shouldn't be hardcoded here + name = 'python-packages/%s' % package_name.lower() + + return json.dumps({name: {'type': repo_type, 'url': url}}, + indent=4, sort_keys=True) + +def main(): + if len(sys.argv) != 2: + # TODO explain the format of python requirements + # warn the user that they probably want to quote their arg + # > < will be interpreted as redirection by the shell + print('usage: %s requirement' % sys.argv[0], file=sys.stderr) + sys.exit(1) + + client = xmlrpclib.ServerProxy(PYPI_URL) + + # TODO: We could take multiple reqs easily enough + req = pkg_resources.parse_requirements(sys.argv[1]).next() + + # This is a bit of a hack to handle pypi package's case insensitivity + # TODO: do not overwrite the original name, create a new attribute + # hurray for this dynamic language! >.> + new_proj_name = name_or_closest(client, req.project_name) + + if new_proj_name == None: + error("Couldn't find any project with name '%s'" % req.project_name) + + logging.debug('Treating %s as %s' % (req.project_name, new_proj_name)) + req.project_name = new_proj_name + + metadata = fetch_package_metadata(req.project_name) + info = metadata['info'] + + repo_type = (find_repo_type(info['home_page']) + if 'home_page' in info else None) + + print(str_repo_lorry(req.project_name, repo_type, info['home_page']) + if repo_type else generate_tarball_lorry(client, req)) + +if __name__ == '__main__': + PythonExtension().run() diff --git a/baserockimport/exts/pip_find_deps_tests.py b/baserockimport/exts/pip_find_deps_tests.py new file mode 100755 index 0000000..c185d2a --- /dev/null +++ b/baserockimport/exts/pip_find_deps_tests.py @@ -0,0 +1,369 @@ +#!/usr/bin/env python +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +import unittest +import random + +# import pip_find_deps +# Again, can't do this +# +# Hack follows +import imp +pip_find_deps = imp.load_source('pip_find_deps', 'pip.find_deps') + +from pkg_resources import parse_requirements, parse_version + +def reverse(xs): + return xs[::-1] + +# TODO: get rid of parse_version from every list +class ConflictDetectionTests(unittest.TestCase): + + def setUp(self): + reqs = ['a == 0.1', 'a == 0.2'] + self.test_requirements = parse_requirements(reqs) + + def run_conflict_test(self, requirements, expected_conflicts): + names = set([r.project_name for r in requirements]) + + with self.assertRaises(pip_find_deps.ConflictError) as cm: + pip_find_deps.resolve_specs(requirements) + + for name in names: + _exps = [(op, parse_version(v)) for (op, v) + in expected_conflicts[name]] + + self.assertEqual(cm.exception.specs, _exps) + + def run_conflict_test_reversed(self, requirements, expected_conflicts): + # First reverse conflicts to get them in the right order + reversed_expected_conflicts = {k: reverse(v) for (k, v) + in expected_conflicts.iteritems()} + + self.run_conflict_test(reverse(requirements), + reversed_expected_conflicts) + + def run_no_conflict_test(self, requirements, expected_specs): + print pip_find_deps.resolve_specs(requirements) + + names = set([r.project_name for r in requirements]) + + for name in names: + _exps = set([(op, parse_version(v)) for (op, v) + in expected_specs[name]]) + + _specs = pip_find_deps.resolve_specs(requirements)[name] + + self.assertEqual(_specs, _exps) + + def test_eqs_eqs(self): + requirements = list(parse_requirements(['a == 0.1', 'a == 0.2'])) + expected_conflicts = {'a': [('==', '0.1'), ('==', '0.2')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + def test_eqs_nt_eq(self): + # == x conflicts with != x + requirements = list(parse_requirements(['a == 0.1', 'a != 0.1'])) + expected_conflicts = {'a': [('==', '0.1'), ('!=', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + def test_eqs_lt(self): + # == x conflicts with < y if x >= y + requirements = list(parse_requirements(['a == 0.2', 'a < 0.1'])) + + expected_conflicts = {'a': [('==', '0.2'), ('<', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + requirements = list(parse_requirements(['a == 0.1', 'a < 0.1'])) + + expected_conflicts = {'a': [('==', '0.1'), ('<', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + def test_eqs_gt(self): + # == x conflicts with > y if x <= y + requirements = list(parse_requirements(['a == 0.1', 'a > 0.1'])) + + expected_conflicts = {'a': [('==', '0.1'), ('>', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + requirements = list(parse_requirements(['a == 0.1', 'a > 0.2'])) + + expected_conflicts = {'a': [('==', '0.1'), ('>', '0.2')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + def test_eqs_lte(self): + # == x conflicts with <= y if x > y + requirements = list(parse_requirements(['a == 0.2', 'a <= 0.1'])) + + expected_conflicts = {'a': [('==', '0.2'), ('<=', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + requirements = list(parse_requirements(['a == 0.1', 'a <= 0.1'])) # no conflict + expected_specs = {'a': set([('==', '0.1'), ('<=', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_eq_gte(self): + # == x conflicts with >= y if x < y + requirements = list(parse_requirements(['a == 0.1', 'a >= 0.2'])) + + expected_conflicts = {'a': [('==', '0.1'), ('>=', '0.2')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + requirements = list(parse_requirements(['a == 0.1', 'a >= 0.1'])) + expected_specs = {'a': set([('==', '0.1'), ('>=', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_lt_lt(self): + # < x < y never conflicts + requirements = list(parse_requirements(['a < 0.1', 'a < 0.1'])) + expected_specs = {'a': set([('<', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + requirements = list(parse_requirements(['a < 0.1', 'a < 0.2'])) + expected_specs = {'a': set([('<', '0.1'), ('<', '0.2')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_lt_gt(self): + # < x conflicts with > y if x <= y + requirements = list(parse_requirements(['a < 0.1', 'a > 0.1'])) + + expected_conflicts = {'a': [('<', '0.1'), ('>', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + requirements = list(parse_requirements(['a < 0.1', 'a > 0.2'])) + + expected_conflicts = {'a': [('<', '0.1'), ('>', '0.2')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + def test_lt_lte(self): + # < x <= y never conflicts + requirements = list(parse_requirements(['a < 0.1', 'a <= 0.1'])) + expected_specs = {'a': set([('<', '0.1'), ('<=', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + requirements = list(parse_requirements(['a < 0.1', 'a <= 0.2'])) + expected_specs = {'a': set([('<', '0.1'), ('<=', '0.2')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_lt_gte(self): + # < x conflicts with >= y if x <= y + requirements = list(parse_requirements(['a < 0.1', 'a >= 0.1'])) + + expected_conflicts = {'a': [('<', '0.1'), ('>=', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + requirements = list(parse_requirements(['a < 0.1', 'a >= 0.2'])) + + expected_conflicts = {'a': [('<', '0.1'), ('>=', '0.2')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + def test_gt_gt(self): + # > x > y never conflicts + requirements = list(parse_requirements(['a > 0.1', 'a > 0.1'])) + expected_specs = {'a': set([('>', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + requirements = list(parse_requirements(['a > 0.1', 'a > 0.2'])) + expected_specs = {'a': set([('>', '0.1'), ('>', '0.2')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_gt_lte(self): + # > x conflicts with <= y if x >= y + requirements = list(parse_requirements(['a > 0.1', 'a <= 0.1'])) + + expected_conflicts = {'a': [('>', '0.1'), ('<=', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + requirements = list(parse_requirements(['a > 0.2', 'a <= 0.1'])) + + expected_conflicts = {'a': [('>', '0.2'), ('<=', '0.1')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + def test_gt_gte(self): + # > x >= y never conflicts + requirements = list(parse_requirements(['a > 0.1', 'a >= 0.1'])) + expected_specs = {'a': set([('>', '0.1'), ('>=', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + requirements = list(parse_requirements(['a > 0.1', 'a >= 0.2'])) + expected_specs = {'a': set([('>', '0.1'), ('>=', '0.2')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_lte_lte(self): + # <= x <= y never conflicts + requirements = list(parse_requirements(['a <= 0.1', 'a <= 0.1'])) + expected_specs = {'a': set([('<=', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + requirements = list(parse_requirements(['a <= 0.1', 'a <= 0.2'])) + expected_specs = {'a': set([('<=', '0.1'), ('<=', '0.2')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_lte_gte(self): + # <= x conflicts with >= y if x < y + # note that if x == y, then the two specs don't add any constraint + requirements = list(parse_requirements(['a <= 0.1', 'a >= 0.1'])) + + expected_specs= {'a': set([('<=', '0.1'), ('>=', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + requirements = list(parse_requirements(['a <= 0.1', 'a >= 0.2'])) + + expected_conflicts = {'a': [('<=', '0.1'), ('>=', '0.2')]} + + self.run_conflict_test(requirements, expected_conflicts) + self.run_conflict_test_reversed(requirements, expected_conflicts) + + def test_gte_gte(self): + # >= x >= y never conflicts + requirements = list(parse_requirements(['a >= 0.1', 'a >= 0.1'])) + expected_specs = {'a': set([('>=', '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + requirements = list(parse_requirements(['a >= 0.1', 'a >= 0.2'])) + expected_specs = {'a': set([('>=', '0.1'), ('>=', '0.2')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_ne(self): + # != can only conflict with == (which is tested above) + for s in ['<', '>', '<=', '>=']: + requirements = list(parse_requirements(['a != 0.1', 'a %s 0.1' % s])) + expected_specs = {'a': set([('!=', '0.1'), ('%s' % s, '0.1')])} + + self.run_no_conflict_test(requirements, expected_specs) + self.run_no_conflict_test(reverse(requirements), expected_specs) + + requirements = list(parse_requirements(['a != 0.1', 'a %s 0.2' % s])) + expected_specs = {'a': set([('!=', '0.1'), ('%s' % s, '0.2')])} + + #self.run_no_conflict_test(requirements, expected_specs) + #self.run_no_conflict_test(reverse(requirements), expected_specs) + + def test_unmatched(self): + # Run all permutations, fail if we get an UnmatchedException + # or something else we weren't expecting + comparitors = ['==', '!=', '<', '>', '<=', '>='] + vs = [('0.1', '0.1'), ('0.1', '0.2'), + ('%s' % random.randint(0, 100), '%s' % random.randint(0, 100))] + + for (vx, vy) in vs: + for cmpx in comparitors: + for cmpy in comparitors: + requirements = parse_requirements(['a %s %s' % (cmpx, vx), + 'a %s %s' % (cmpy, vy)]) + try: + pip_find_deps.resolve_specs(requirements) + except pip_find_deps.ConflictError: + pass + except pip_find_deps.UnmatchedException as e: + self.fail('Got UnmatchedException: %s' % e) + except Exception as e: + self.fail('Got some other unexpected Exception: %s' % e) + + def test_cause_unmatched(self): + requirements_specs = list(parse_requirements(['a == 0.1', 'a == 0.1'])) + + # replace our parsed specs with invalid specs + # specifically, specs with a invalid operators + # + # note, one spec won't do, we're validating the specs logically + # not syntactically; we assume the specs themselves have been parsed + # by pkg_resources which will do the validation for us. + # + # so we need two specs to force a check for a conflict, + # an UnmatchedError should occur if neither of the specs + # contain an operator recognised by the conflict detector + # e.g. '===', which is undefined in a spec + requirements_specs[0].specs = [('===', '0.1')] + requirements_specs[1].specs = [('===', '0.1')] + + with self.assertRaises(pip_find_deps.UnmatchedError): + specs = pip_find_deps.resolve_specs(requirements_specs) + + def test_distinct_requirements_no_conflict(self): + requirements = list(parse_requirements(['a == 0.1', 'b == 0.1'])) + + specs = pip_find_deps.resolve_specs(requirements) + + expected_specs = {'a': set([('==', parse_version('0.1'))]), + 'b': set([('==', parse_version('0.1'))])} + + self.assertEqual(specs, expected_specs) + + +if __name__ == '__main__': + suite = unittest.TestLoader().loadTestsFromTestCase(ConflictDetectionTests) + #suite = unittest.TestSuite() + #suite.addTest(ConflictDetectionTests('test_ne')) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/baserockimport/exts/pip_lorry_tests.py b/baserockimport/exts/pip_lorry_tests.py new file mode 100755 index 0000000..56ec6fc --- /dev/null +++ b/baserockimport/exts/pip_lorry_tests.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# Copyright (C) 2014 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +# import pip_lorry +# Can't do this yet +# +# So here's a hack for now +import imp +pip_lorry = imp.load_source('pip_lorry', 'pip.to_lorry') + +import json + +import unittest + +class Tests(unittest.TestCase): + + def test_make_tarball_lorry(self): + gzip, bzip, lzma = 'gzip', 'bzip2', 'lzma' + + valid_extensions = {'tar.gz': gzip, 'tgz': gzip, 'tar.Z': gzip, + 'tar.bz2': bzip, 'tbz2': bzip, + 'tar.lzma': lzma, 'tar.xz': lzma, + 'tlz': lzma, 'txz': lzma} + + def make_url(extension): + return 'http://foobar.baz/%s' % extension + + def get_tarball_lorry_url(name, lorry_json): + return json.loads(lorry_json)[name + '-tarball']['url'] + + fake_package_name = 'name' + urls = [make_url(extension) for extension in valid_extensions] + + for url in urls: + lorry_json = pip_lorry.make_tarball_lorry('name', url) + self.assertEqual(get_tarball_lorry_url(fake_package_name, + lorry_json), url) + + url = 'http://foobar/baz.tar' + lorry_json = pip_lorry.make_tarball_lorry('name', url) + self.assertEqual(get_tarball_lorry_url(fake_package_name, + lorry_json), url) + +if __name__ == '__main__': + suite = unittest.TestLoader().loadTestsFromTestCase(Tests) + unittest.TextTestRunner(verbosity=2).run(suite) diff --git a/baserockimport/mainloop.py b/baserockimport/mainloop.py index b400695..057ab98 100644 --- a/baserockimport/mainloop.py +++ b/baserockimport/mainloop.py @@ -58,6 +58,7 @@ def run_extension(filename, args): output.append(line) def report_extension_stderr(line): + logging.debug('Received "%s" on stderr' % line) errors.append(line) def report_extension_logger(line): @@ -115,7 +116,7 @@ class ImportLoop(object): self.importers = {} - def enable_importer(self, kind, extra_args=[]): + def enable_importer(self, kind, extra_args=[], **kwargs): '''Enable an importer extension in this ImportLoop instance. At least one importer extension must be enabled for the loop to do @@ -129,7 +130,8 @@ class ImportLoop(object): ''' assert kind not in self.importers self.importers[kind] = { - 'extra_args': extra_args + 'extra_args': extra_args, + 'kwargs': kwargs } def run(self): @@ -589,18 +591,22 @@ class ImportLoop(object): 'ref': m.ref, 'unpetrify-ref': m.named_ref, 'morph': m.filename, - 'build-depends': build_depends, + 'build-depends': build_depends } chunk_entries.append(entry) + kwargs = self.importers[kind]['kwargs'] + + stratum_build_depends = ( + [{'morph': stratum} for stratum in kwargs['strata']] + if 'strata' in kwargs else []) + stratum_name = goal_name stratum = { 'name': stratum_name, 'kind': 'stratum', 'description': 'Autogenerated by Baserock import tool', - 'build-depends': [ - {'morph': 'strata/ruby.morph'} - ], + 'build-depends': stratum_build_depends, 'chunks': chunk_entries, } -- cgit v1.2.1