#!/usr/bin/python # -*- coding: utf-8 -*- # # Create a Baserock .lorry file for a given Perl distribution # # Copyright © 2015 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. from __future__ import print_function import json import logging import os import sys import requests import urlparse import re import subprocess from importer_base import ImportException, ImportExtension, WebServiceClient import utils class GenerateLorryException(ImportException): def __init__(self, msg): s = "Couldn't generate lorry: %s" % msg super(GenerateLorryException, self).__init__(s) METACPAN_URL = 'http://api.metacpan.org/v0' NO_RELEASE_ERRMSG = ("Couldn't find a release of distribution `%s' " "with version `%s'") NO_DOWNLOAD_URL_ERRMSG = ("Couldn't get download url for distribution `%s' " "with version `%s': " "server returned unexpected response") class CPANLorryGenerator(ImportExtension): def __init__(self): super(CPANLorryGenerator, self).__init__() # FIXME: post requests don't seem to work with requests_cache # fails with # "TypeError: request() got an unexpected keyword argument 'json'" #self.apiclient = WebServiceClient('cpan_api_cache') def search_for_url_match(self, dist_name, dist_version): ''' If we don't get a hit we can try an alternative query, get all download_urls and hope they follow a convention that can be extracted with CPAN::DistnameInfo ''' q = {"query": { "filtered":{ "query":{"match_all":{}}, "filter":{ "term":{"release.distribution": dist_name} } }}, "fields": ["download_url"]} def extensions_dir(): return os.path.dirname(__file__) query_url = METACPAN_URL + '/release/_search' r = requests.post(query_url, json=q) r.raise_for_status() hits = r.json()['hits'] logging.debug("Got %s hits", hits['total']) if hits['total'] == 0: return None for hit in hits['hits']: logging.debug('hit: %s', hit) download_url = hit['fields']['download_url'] r = re.match('https?://cpan.metacpan.org/(.*)', download_url) if not r: return None pathname = r.groups(0)[0] exepath = os.path.join(extensions_dir(), 'pathname2distinfo.pl') dist_info = json.loads(subprocess.check_output([exepath, pathname])) logging.debug("Dist info: %s", dist_info) if dist_info.get('version') == dist_version: return download_url return None def get_tarball_url(self, dist_name, dist_version): if dist_version is None: r = requests.get(METACPAN_URL + '/release/' + dist_name) r.raise_for_status() return r.json().get('download_url') q = {"query": { "filtered":{ "query":{"match_all":{}}, "filter":{"and":[ {"term":{"release.distribution": dist_name}}, {"term":{"release.version": dist_version}} ]} }}, "fields": ["download_url"]} # TODO: use apiclient query_url = METACPAN_URL + '/release/_search' r = requests.post(query_url, json=q) r.raise_for_status() logging.debug('r.json(): %s', r.json()) hits = r.json()['hits']['total'] if hits == 0: download_url = self.search_for_url_match(dist_name, dist_version) if download_url is None: raise GenerateLorryException(NO_RELEASE_ERRMSG % (dist_name, dist_version)) else: try: fields = r.json()['hits']['hits'][0]['fields'] download_url = fields['download_url'] except KeyError: raise GenerateLorryException(NO_DOWNLOAD_URL_ERRMSG % (dist_name, dist_version)) return download_url def process_args(self, args): if len(args) not in (1, 2): raise ImportException('usage: %s NAME [VERSION]' % sys.argv[0]) dist_name = args[0] dist_version = args[1] if len(args) == 2 else None logging.info('Generating tarball lorry') pathname = None metadata_path = os.environ.get('IMPORT_METAPATH') # metadata passed through IMPORT_METAPATH may already contain # the distribution pathname, which we can use for lorrying, # if this is the case then there's no need to query metacpan. if metadata_path: logging.debug('Got metadata path: %s', metadata_path) try: with open(metadata_path) as f: pathname = (json.load(f)['cpan'] ['dist-meta'][dist_name]['pathname']) logging.debug('got pathname: %s', pathname) except (KeyError, ValueError, IOError) as e: logging.debug('Following exception can be safely ignored') logging.exception(e) # we will fallback to querying cpan # if we don't have the pathname in the metadata pass if pathname: logging.debug("Taking pathname `%s' from parent metadata", pathname) url = 'http://cpan.metacpan.org/authors/id/%s' % pathname else: logging.debug("Querying metacpan for lorry details for %s %s", dist_name, dist_version) try: url = self.get_tarball_url(dist_name, dist_version) except requests.exceptions.RequestException as e: raise GenerateLorryException('got %s while fetching %s' % (e, e.request.url)) lorry = utils.str_tarball_lorry('cpan', 'cpan', dist_name, url) print(lorry) if __name__ == '__main__': CPANLorryGenerator().run()