summaryrefslogtreecommitdiff
path: root/baserockimport/exts/rubygems.to_lorry
blob: 04132044e1734cf1d888a08f902611878716b247 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/usr/bin/python
#
# Create a Baserock .lorry file for a given RubyGem
#
# Copyright (C) 2014  Codethink Limited
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 of the License.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.


import requests
import requests_cache
import yaml

import logging
import json
import os
import sys
import urlparse

from importer_base import ImportException, ImportExtension


class GenerateLorryException(ImportException):
    pass


class RubyGemsWebServiceClient(object):
    def __init__(self):
        # Save hammering the rubygems.org API: 'requests' API calls are
        # transparently cached in an SQLite database, instead.
        requests_cache.install_cache('rubygems_api_cache')

    def _request(self, url):
        r = requests.get(url)
        if r.ok:
            return json.loads(r.text)
        else:
            raise GenerateLorryException(
                'Request to %s failed: %s' % (r.url, r.reason))

    def get_gem_info(self, gem_name):
        info = self._request(
            'http://rubygems.org/api/v1/gems/%s.json' % gem_name)

        if info['name'] != gem_name:
            # Sanity check
            raise GenerateLorryException(
                 'Received info for Gem "%s", requested "%s"' % info['name'],
                  gem_name)

        return info


class RubyGemLorryGenerator(ImportExtension):
    def __init__(self):
        super(RubyGemLorryGenerator, self).__init__()

        with open(self.local_data_path('rubygems.yaml'), 'r') as f:
            local_data = yaml.load(f.read())

        self.lorry_prefix = local_data['lorry-prefix']
        self.known_source_uris = local_data['known-source-uris']

        logging.debug(
            "Loaded %i known source URIs from local metadata.", len(self.known_source_uris))

    def process_args(self, args):
        if len(args) not in [1, 2]:
            raise ImportException(
                'Please call me with the name of a RubyGem as an argument'
                ' and optionally its version number (format: NAME [VERSION])')

        gem_name = args[0]

        lorry = self.generate_lorry_for_gem(gem_name)
        self.write_lorry(sys.stdout, lorry)

    def find_upstream_repo_for_gem(self, gem_name, gem_info):
        source_code_uri = gem_info['source_code_uri']

        if gem_name in self.known_source_uris:
            logging.debug('Found %s in known-source-uris', gem_name)
            known_uri = self.known_source_uris[gem_name]
            if source_code_uri is not None and known_uri != source_code_uri:
                sys.stderr.write(
                    '%s: Hardcoded source URI %s doesn\'t match spec URI %s\n' %
                    (gem_name, known_uri, source_code_uri))
            return known_uri

        if source_code_uri is not None and len(source_code_uri) > 0:
            logging.debug('Got source_code_uri %s', source_code_uri)
            if source_code_uri.endswith('/tree'):
                source_code_uri = source_code_uri[:-len('/tree')]

            return source_code_uri

        homepage_uri = gem_info['homepage_uri']
        if homepage_uri is not None and len(homepage_uri) > 0:
            logging.debug('Got homepage_uri %s', source_code_uri)
            uri = self.detect_source_code_uri_from_homepage(homepage_uri)
            if uri is not None:
                return uri

        # Further possible leads on locating source code.
        # http://ruby-toolbox.com/projects/$gemname -> sometimes contains an
        #   upstream link, even if the gem info does not.
        # https://github.com/search?q=$gemname -> often the first result is
        #   the correct one, but you can never know.

        raise GenerateLorryException(
            "Gem metadata for '%s' does not point to its source code "
            "repository." % gem_name)

    def detect_source_code_uri_from_homepage(self, homepage_uri):
        '''Try to detect source code location based on homepage_uri.

        It seems common for RubyGem projects to be hosted on Github, and for
        them to use link to a URL inside their Github project as their
        homepage, and for them to not set source_code_uri. This heuristic saves
        the user from manually writing .lorry files for such projects.

        '''

        uri_parts = urlparse.urlsplit(homepage_uri)
        scheme, netloc = uri_parts[0:2]

        if netloc == 'github.com':
            path = uri_parts[2]
            path_parts = path.lstrip('/').split('/')

            if len(path_parts) < 2:
                logging.debug(
                    '%s points to Github but not a specific repo.',
                    homepage_uri)
                return None

            # Strip off any trailing components, stuff like '/wiki'.
            path = '/'.join(path_parts[0:2])
            uri = '%s://%s/%s'  % (scheme, netloc, path)

            logging.debug('Assuming %s is the source code URI.', uri)
            return uri
        else:
            return None

    def project_name_from_repo(self, repo_url):
        if repo_url.endswith('/tree/master'):
            repo_url = repo_url[:-len('/tree/master')]
        if repo_url.endswith('/'):
            repo_url = repo_url[:-1]
        if repo_url.endswith('.git'):
            repo_url = repo_url[:-len('.git')]
        return os.path.basename(repo_url)

    def generate_lorry_for_gem(self, gem_name):
        rubygems_client = RubyGemsWebServiceClient()

        gem_info = rubygems_client.get_gem_info(gem_name)

        gem_source_url = self.find_upstream_repo_for_gem(gem_name, gem_info)
        logging.info('Got URL <%s> for %s', gem_source_url, gem_name)

        project_name = self.project_name_from_repo(gem_source_url)
        lorry_name = self.lorry_prefix + project_name

        # One repo may produce multiple Gems. It's up to the caller to merge
        # multiple .lorry files that get generated for the same repo.

        lorry = {
            lorry_name: {
                'type': 'git',
                'url': gem_source_url,
                'x-products-rubygems': [gem_name]
            }
        }

        return lorry

    def write_lorry(self, stream, lorry):
        json.dump(lorry, stream, indent=4)
        # Needed so the morphlib.extensions code will pick up the last line.
        stream.write('\n')


if __name__ == '__main__':
    RubyGemLorryGenerator().run()