From 08c531c27d0035897f0212cb3d7aed54d6f804ea Mon Sep 17 00:00:00 2001 From: Richard Ipsum Date: Wed, 19 Aug 2015 15:30:55 +0000 Subject: Add cpan.to_lorry extension Change-Id: I3e8077d1e91a28ac0ed30cb0e8102622c866a8e0 --- baserockimport/exts/cpan.to_lorry | 194 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 194 insertions(+) create mode 100755 baserockimport/exts/cpan.to_lorry diff --git a/baserockimport/exts/cpan.to_lorry b/baserockimport/exts/cpan.to_lorry new file mode 100755 index 0000000..0229bed --- /dev/null +++ b/baserockimport/exts/cpan.to_lorry @@ -0,0 +1,194 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Create a Baserock .lorry file for a given Perl distribution +# +# Copyright © 2015 Codethink Limited +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; version 2 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + +from __future__ import print_function + +import json +import logging +import os +import sys +import requests +import urlparse +import re +import subprocess + +from importer_base import ImportException, ImportExtension, WebServiceClient +import utils + + +class GenerateLorryException(ImportException): + def __init__(self, msg): + s = "Couldn't generate lorry: %s" % msg + super(GenerateLorryException, self).__init__(s) + + +METACPAN_URL = 'http://api.metacpan.org/v0' + +NO_RELEASE_ERRMSG = ("Couldn't find a release of distribution `%s' " + "with version `%s'") + +NO_DOWNLOAD_URL_ERRMSG = ("Couldn't get download url for distribution `%s' " + "with version `%s': " + "server returned unexpected response") + + +class CPANLorryGenerator(ImportExtension): + + def __init__(self): + super(CPANLorryGenerator, self).__init__() + # FIXME: post requests don't seem to work with requests_cache + # fails with + # "TypeError: request() got an unexpected keyword argument 'json'" + #self.apiclient = WebServiceClient('cpan_api_cache') + + def search_for_url_match(self, dist_name, dist_version): + ''' If we don't get a hit we can try an alternative query, + get all download_urls and hope they follow a convention + that can be extracted with CPAN::DistnameInfo ''' + + q = {"query": { "filtered":{ + "query":{"match_all":{}}, + "filter":{ + "term":{"release.distribution": dist_name} + } + }}, + "fields": ["download_url"]} + + def extensions_dir(): + return os.path.dirname(__file__) + + query_url = METACPAN_URL + '/release/_search' + r = requests.post(query_url, json=q) + r.raise_for_status() + + hits = r.json()['hits'] + logging.debug("Got %s hits", hits['total']) + if hits['total'] == 0: + return None + + for hit in hits['hits']: + logging.debug('hit: %s', hit) + download_url = hit['fields']['download_url'] + r = re.match('https?://cpan.metacpan.org/(.*)', download_url) + + if not r: + return None + pathname = r.groups(0)[0] + + exepath = os.path.join(extensions_dir(), + 'pathname2distinfo.pl') + dist_info = json.loads(subprocess.check_output([exepath, + pathname])) + + logging.debug("Dist info: %s", dist_info) + + if dist_info.get('version') == dist_version: + return download_url + + return None + + def get_tarball_url(self, dist_name, dist_version): + if dist_version is None: + r = requests.get(METACPAN_URL + '/release/' + dist_name) + r.raise_for_status() + + return r.json().get('download_url') + + q = {"query": { "filtered":{ + "query":{"match_all":{}}, + "filter":{"and":[ + {"term":{"release.distribution": dist_name}}, + {"term":{"release.version": dist_version}} + ]} + }}, + "fields": ["download_url"]} + + # TODO: use apiclient + query_url = METACPAN_URL + '/release/_search' + r = requests.post(query_url, json=q) + r.raise_for_status() + + logging.debug('r.json(): %s', r.json()) + + hits = r.json()['hits']['total'] + if hits == 0: + download_url = self.search_for_url_match(dist_name, dist_version) + if download_url is None: + raise GenerateLorryException(NO_RELEASE_ERRMSG + % (dist_name, dist_version)) + else: + try: + fields = r.json()['hits']['hits'][0]['fields'] + download_url = fields['download_url'] + except KeyError: + raise GenerateLorryException(NO_DOWNLOAD_URL_ERRMSG + % (dist_name, dist_version)) + + return download_url + + def process_args(self, args): + if len(args) not in (1, 2): + raise ImportException('usage: %s NAME [VERSION]' % sys.argv[0]) + + dist_name = args[0] + dist_version = args[1] if len(args) == 2 else None + + logging.info('Generating tarball lorry') + + pathname = None + metadata_path = os.environ.get('IMPORT_METAPATH') + # metadata passed through IMPORT_METAPATH may already contain + # the distribution pathname, which we can use for lorrying, + # if this is the case then there's no need to query metacpan. + + if metadata_path: + logging.debug('Got metadata path: %s', metadata_path) + try: + with open(metadata_path) as f: + pathname = (json.load(f)['cpan'] + ['dist-meta'][dist_name]['pathname']) + logging.debug('got pathname: %s', pathname) + except (KeyError, ValueError, IOError) as e: + logging.debug('Following exception can be safely ignored') + logging.exception(e) + # we will fallback to querying cpan + # if we don't have the pathname in the metadata + pass + + if pathname: + logging.debug("Taking pathname `%s' from parent metadata", + pathname) + url = 'http://cpan.metacpan.org/authors/id/%s' % pathname + else: + logging.debug("Querying metacpan for lorry details for %s %s", + dist_name, dist_version) + try: + url = self.get_tarball_url(dist_name, dist_version) + except requests.exceptions.RequestException as e: + raise GenerateLorryException('got %s while fetching %s' + % (e, e.request.url)) + + lorry = utils.str_tarball_lorry('cpan', 'cpan', dist_name, url) + print(lorry) + + +if __name__ == '__main__': + CPANLorryGenerator().run() -- cgit v1.2.1