#!/usr/bin/python # # Create a Baserock .lorry file for a given RubyGem # # Copyright (C) 2014 Codethink Limited # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; version 2 of the License. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import requests import requests_cache import yaml import logging import json import os import sys import urlparse from importer_base import ImportException, ImportExtension class GenerateLorryException(ImportException): pass class RubyGemsWebServiceClient(object): def __init__(self): # Save hammering the rubygems.org API: 'requests' API calls are # transparently cached in an SQLite database, instead. requests_cache.install_cache('rubygems_api_cache') def _request(self, url): r = requests.get(url) if r.ok: return json.loads(r.text) else: raise GenerateLorryException( 'Request to %s failed: %s' % (r.url, r.reason)) def get_gem_info(self, gem_name): info = self._request( 'http://rubygems.org/api/v1/gems/%s.json' % gem_name) if info['name'] != gem_name: # Sanity check raise GenerateLorryException( 'Received info for Gem "%s", requested "%s"' % info['name'], gem_name) return info class RubyGemLorryGenerator(ImportExtension): def __init__(self): super(RubyGemLorryGenerator, self).__init__() with open(self.local_data_path('rubygems.yaml'), 'r') as f: local_data = yaml.load(f.read()) self.lorry_prefix = local_data['lorry-prefix'] self.known_source_uris = local_data['known-source-uris'] logging.debug( "Loaded %i known source URIs from local metadata.", len(self.known_source_uris)) def process_args(self, args): if len(args) != 1: raise ImportException( 'Please call me with the name of a RubyGem as an argument.') gem_name = args[0] lorry = self.generate_lorry_for_gem(gem_name) self.write_lorry(sys.stdout, lorry) def find_upstream_repo_for_gem(self, gem_name, gem_info): source_code_uri = gem_info['source_code_uri'] if gem_name in self.known_source_uris: logging.debug('Found %s in known-source-uris', gem_name) known_uri = self.known_source_uris[gem_name] if source_code_uri is not None and known_uri != source_code_uri: sys.stderr.write( '%s: Hardcoded source URI %s doesn\'t match spec URI %s\n' % (gem_name, known_uri, source_code_uri)) return known_uri if source_code_uri is not None and len(source_code_uri) > 0: logging.debug('Got source_code_uri %s', source_code_uri) if source_code_uri.endswith('/tree'): source_code_uri = source_code_uri[:-len('/tree')] return source_code_uri homepage_uri = gem_info['homepage_uri'] if homepage_uri is not None and len(homepage_uri) > 0: logging.debug('Got homepage_uri %s', source_code_uri) uri = self.detect_source_code_uri_from_homepage(homepage_uri) if uri is not None: return uri # Further possible leads on locating source code. # http://ruby-toolbox.com/projects/$gemname -> sometimes contains an # upstream link, even if the gem info does not. # https://github.com/search?q=$gemname -> often the first result is # the correct one, but you can never know. raise GenerateLorryException( "Gem metadata for '%s' does not point to its source code " "repository." % gem_name) def detect_source_code_uri_from_homepage(self, homepage_uri): '''Try to detect source code location based on homepage_uri. It seems common for RubyGem projects to be hosted on Github, and for them to use link to a URL inside their Github project as their homepage, and for them to not set source_code_uri. This heuristic saves the user from manually writing .lorry files for such projects. ''' uri_parts = urlparse.urlsplit(homepage_uri) scheme, netloc = uri_parts[0:2] if netloc == 'github.com': path = uri_parts[2] path_parts = path.lstrip('/').split('/') if len(path_parts) < 2: logging.debug( '%s points to Github but not a specific repo.', homepage_uri) return None # Strip off any trailing components, stuff like '/wiki'. path = '/'.join(path_parts[0:2]) uri = '%s://%s/%s' % (scheme, netloc, path) logging.debug('Assuming %s is the source code URI.', uri) return uri else: return None def project_name_from_repo(self, repo_url): if repo_url.endswith('/tree/master'): repo_url = repo_url[:-len('/tree/master')] if repo_url.endswith('/'): repo_url = repo_url[:-1] if repo_url.endswith('.git'): repo_url = repo_url[:-len('.git')] return os.path.basename(repo_url) def generate_lorry_for_gem(self, gem_name): rubygems_client = RubyGemsWebServiceClient() gem_info = rubygems_client.get_gem_info(gem_name) gem_source_url = self.find_upstream_repo_for_gem(gem_name, gem_info) logging.info('Got URL <%s> for %s', gem_source_url, gem_name) project_name = self.project_name_from_repo(gem_source_url) lorry_name = self.lorry_prefix + project_name # One repo may produce multiple Gems. It's up to the caller to merge # multiple .lorry files that get generated for the same repo. lorry = { lorry_name: { 'type': 'git', 'url': gem_source_url, 'x-products-rubygems': [gem_name] } } return lorry def write_lorry(self, stream, lorry): json.dump(lorry, stream, indent=4) # Needed so the morphlib.extensions code will pick up the last line. stream.write('\n') if __name__ == '__main__': RubyGemLorryGenerator().run()