summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Ipsum <richard.ipsum@codethink.co.uk>2015-08-19 15:30:55 +0000
committerRichard Ipsum <richard.ipsum@codethink.co.uk>2015-08-25 10:45:11 +0000
commit08c531c27d0035897f0212cb3d7aed54d6f804ea (patch)
treeac88740ca2fb3e3dd8de6b15ff4253d23ab0918b
parent79cfccb7d8165299fbdffdc6663cea8f259c859b (diff)
downloadimport-08c531c27d0035897f0212cb3d7aed54d6f804ea.tar.gz
Add cpan.to_lorry extension
Change-Id: I3e8077d1e91a28ac0ed30cb0e8102622c866a8e0
-rwxr-xr-xbaserockimport/exts/cpan.to_lorry194
1 files changed, 194 insertions, 0 deletions
diff --git a/baserockimport/exts/cpan.to_lorry b/baserockimport/exts/cpan.to_lorry
new file mode 100755
index 0000000..0229bed
--- /dev/null
+++ b/baserockimport/exts/cpan.to_lorry
@@ -0,0 +1,194 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+# Create a Baserock .lorry file for a given Perl distribution
+#
+# Copyright © 2015 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+from __future__ import print_function
+
+import json
+import logging
+import os
+import sys
+import requests
+import urlparse
+import re
+import subprocess
+
+from importer_base import ImportException, ImportExtension, WebServiceClient
+import utils
+
+
+class GenerateLorryException(ImportException):
+ def __init__(self, msg):
+ s = "Couldn't generate lorry: %s" % msg
+ super(GenerateLorryException, self).__init__(s)
+
+
+METACPAN_URL = 'http://api.metacpan.org/v0'
+
+NO_RELEASE_ERRMSG = ("Couldn't find a release of distribution `%s' "
+ "with version `%s'")
+
+NO_DOWNLOAD_URL_ERRMSG = ("Couldn't get download url for distribution `%s' "
+ "with version `%s': "
+ "server returned unexpected response")
+
+
+class CPANLorryGenerator(ImportExtension):
+
+ def __init__(self):
+ super(CPANLorryGenerator, self).__init__()
+ # FIXME: post requests don't seem to work with requests_cache
+ # fails with
+ # "TypeError: request() got an unexpected keyword argument 'json'"
+ #self.apiclient = WebServiceClient('cpan_api_cache')
+
+ def search_for_url_match(self, dist_name, dist_version):
+ ''' If we don't get a hit we can try an alternative query,
+ get all download_urls and hope they follow a convention
+ that can be extracted with CPAN::DistnameInfo '''
+
+ q = {"query": { "filtered":{
+ "query":{"match_all":{}},
+ "filter":{
+ "term":{"release.distribution": dist_name}
+ }
+ }},
+ "fields": ["download_url"]}
+
+ def extensions_dir():
+ return os.path.dirname(__file__)
+
+ query_url = METACPAN_URL + '/release/_search'
+ r = requests.post(query_url, json=q)
+ r.raise_for_status()
+
+ hits = r.json()['hits']
+ logging.debug("Got %s hits", hits['total'])
+ if hits['total'] == 0:
+ return None
+
+ for hit in hits['hits']:
+ logging.debug('hit: %s', hit)
+ download_url = hit['fields']['download_url']
+ r = re.match('https?://cpan.metacpan.org/(.*)', download_url)
+
+ if not r:
+ return None
+ pathname = r.groups(0)[0]
+
+ exepath = os.path.join(extensions_dir(),
+ 'pathname2distinfo.pl')
+ dist_info = json.loads(subprocess.check_output([exepath,
+ pathname]))
+
+ logging.debug("Dist info: %s", dist_info)
+
+ if dist_info.get('version') == dist_version:
+ return download_url
+
+ return None
+
+ def get_tarball_url(self, dist_name, dist_version):
+ if dist_version is None:
+ r = requests.get(METACPAN_URL + '/release/' + dist_name)
+ r.raise_for_status()
+
+ return r.json().get('download_url')
+
+ q = {"query": { "filtered":{
+ "query":{"match_all":{}},
+ "filter":{"and":[
+ {"term":{"release.distribution": dist_name}},
+ {"term":{"release.version": dist_version}}
+ ]}
+ }},
+ "fields": ["download_url"]}
+
+ # TODO: use apiclient
+ query_url = METACPAN_URL + '/release/_search'
+ r = requests.post(query_url, json=q)
+ r.raise_for_status()
+
+ logging.debug('r.json(): %s', r.json())
+
+ hits = r.json()['hits']['total']
+ if hits == 0:
+ download_url = self.search_for_url_match(dist_name, dist_version)
+ if download_url is None:
+ raise GenerateLorryException(NO_RELEASE_ERRMSG
+ % (dist_name, dist_version))
+ else:
+ try:
+ fields = r.json()['hits']['hits'][0]['fields']
+ download_url = fields['download_url']
+ except KeyError:
+ raise GenerateLorryException(NO_DOWNLOAD_URL_ERRMSG
+ % (dist_name, dist_version))
+
+ return download_url
+
+ def process_args(self, args):
+ if len(args) not in (1, 2):
+ raise ImportException('usage: %s NAME [VERSION]' % sys.argv[0])
+
+ dist_name = args[0]
+ dist_version = args[1] if len(args) == 2 else None
+
+ logging.info('Generating tarball lorry')
+
+ pathname = None
+ metadata_path = os.environ.get('IMPORT_METAPATH')
+ # metadata passed through IMPORT_METAPATH may already contain
+ # the distribution pathname, which we can use for lorrying,
+ # if this is the case then there's no need to query metacpan.
+
+ if metadata_path:
+ logging.debug('Got metadata path: %s', metadata_path)
+ try:
+ with open(metadata_path) as f:
+ pathname = (json.load(f)['cpan']
+ ['dist-meta'][dist_name]['pathname'])
+ logging.debug('got pathname: %s', pathname)
+ except (KeyError, ValueError, IOError) as e:
+ logging.debug('Following exception can be safely ignored')
+ logging.exception(e)
+ # we will fallback to querying cpan
+ # if we don't have the pathname in the metadata
+ pass
+
+ if pathname:
+ logging.debug("Taking pathname `%s' from parent metadata",
+ pathname)
+ url = 'http://cpan.metacpan.org/authors/id/%s' % pathname
+ else:
+ logging.debug("Querying metacpan for lorry details for %s %s",
+ dist_name, dist_version)
+ try:
+ url = self.get_tarball_url(dist_name, dist_version)
+ except requests.exceptions.RequestException as e:
+ raise GenerateLorryException('got %s while fetching %s'
+ % (e, e.request.url))
+
+ lorry = utils.str_tarball_lorry('cpan', 'cpan', dist_name, url)
+ print(lorry)
+
+
+if __name__ == '__main__':
+ CPANLorryGenerator().run()