summaryrefslogtreecommitdiff
path: root/baserockimport/exts/cpan.to_lorry
blob: 0229bedb674a80629c4f6fcdf64c6700e1a56161 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Create a Baserock .lorry file for a given Perl distribution
#
# Copyright © 2015  Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


from __future__ import print_function

import json
import logging
import os
import sys
import requests
import urlparse
import re
import subprocess

from importer_base import ImportException, ImportExtension, WebServiceClient
import utils


class GenerateLorryException(ImportException):
    def __init__(self, msg):
        s = "Couldn't generate lorry: %s" % msg
        super(GenerateLorryException, self).__init__(s)


METACPAN_URL = 'http://api.metacpan.org/v0'

NO_RELEASE_ERRMSG = ("Couldn't find a release of distribution `%s' "
                     "with version `%s'")

NO_DOWNLOAD_URL_ERRMSG = ("Couldn't get download url for distribution `%s' "
                          "with version `%s': "
                          "server returned unexpected response")


class CPANLorryGenerator(ImportExtension):

    def __init__(self):
        super(CPANLorryGenerator, self).__init__()
        # FIXME: post requests don't seem to work with requests_cache
        # fails with
        # "TypeError: request() got an unexpected keyword argument 'json'"
        #self.apiclient = WebServiceClient('cpan_api_cache')

    def search_for_url_match(self, dist_name, dist_version):
        ''' If we don't get a hit we can try an alternative query,
            get all download_urls and hope they follow a convention
            that can be extracted with CPAN::DistnameInfo '''

        q = {"query": { "filtered":{
                "query":{"match_all":{}},
                    "filter":{
                        "term":{"release.distribution": dist_name}
                    }
                }},
             "fields": ["download_url"]}

        def extensions_dir():
            return os.path.dirname(__file__)

        query_url = METACPAN_URL + '/release/_search'
        r = requests.post(query_url, json=q)
        r.raise_for_status()

        hits = r.json()['hits']
        logging.debug("Got %s hits", hits['total'])
        if hits['total'] == 0:
            return None

        for hit in hits['hits']:
            logging.debug('hit: %s', hit)
            download_url = hit['fields']['download_url']
            r = re.match('https?://cpan.metacpan.org/(.*)', download_url)

            if not r:
                return None
            pathname = r.groups(0)[0]

            exepath = os.path.join(extensions_dir(),
                                   'pathname2distinfo.pl')
            dist_info = json.loads(subprocess.check_output([exepath,
                                                            pathname]))

            logging.debug("Dist info: %s", dist_info)

            if dist_info.get('version') == dist_version:
                return download_url

        return None

    def get_tarball_url(self, dist_name, dist_version):
        if dist_version is None:
            r = requests.get(METACPAN_URL + '/release/' + dist_name)
            r.raise_for_status()

            return r.json().get('download_url')

        q = {"query": { "filtered":{
                "query":{"match_all":{}},
                    "filter":{"and":[
                        {"term":{"release.distribution": dist_name}},
                        {"term":{"release.version": dist_version}}
                    ]}
                }},
             "fields": ["download_url"]}

        # TODO: use apiclient
        query_url = METACPAN_URL + '/release/_search'
        r = requests.post(query_url, json=q)
        r.raise_for_status()

        logging.debug('r.json(): %s', r.json())

        hits = r.json()['hits']['total']
        if hits == 0:
            download_url = self.search_for_url_match(dist_name, dist_version)
            if download_url is None:
                raise GenerateLorryException(NO_RELEASE_ERRMSG
                                             % (dist_name, dist_version))
        else:
            try:
                fields = r.json()['hits']['hits'][0]['fields']
                download_url = fields['download_url']
            except KeyError:
                raise GenerateLorryException(NO_DOWNLOAD_URL_ERRMSG
                                             % (dist_name, dist_version))

        return download_url

    def process_args(self, args):
        if len(args) not in (1, 2):
            raise ImportException('usage: %s NAME [VERSION]' % sys.argv[0])

        dist_name = args[0]
        dist_version = args[1] if len(args) == 2 else None

        logging.info('Generating tarball lorry')

        pathname = None
        metadata_path = os.environ.get('IMPORT_METAPATH')
        # metadata passed through IMPORT_METAPATH may already contain
        # the distribution pathname, which we can use for lorrying,
        # if this is the case then there's no need to query metacpan.

        if metadata_path:
            logging.debug('Got metadata path: %s', metadata_path)
            try:
                with open(metadata_path) as f:
                    pathname = (json.load(f)['cpan']
                                ['dist-meta'][dist_name]['pathname'])
                    logging.debug('got pathname: %s', pathname)
            except (KeyError, ValueError, IOError) as e:
                logging.debug('Following exception can be safely ignored')
                logging.exception(e)
                # we will fallback to querying cpan
                # if we don't have the pathname in the metadata
                pass

        if pathname:
            logging.debug("Taking pathname `%s' from parent metadata",
                          pathname)
            url = 'http://cpan.metacpan.org/authors/id/%s' % pathname
        else:
            logging.debug("Querying metacpan for lorry details for %s %s",
                          dist_name, dist_version)
            try:
                url = self.get_tarball_url(dist_name, dist_version)
            except requests.exceptions.RequestException as e:
                raise GenerateLorryException('got %s while fetching %s'
                                             % (e, e.request.url))

        lorry = utils.str_tarball_lorry('cpan', 'cpan', dist_name, url)
        print(lorry)


if __name__ == '__main__':
    CPANLorryGenerator().run()