diff options
author | Robert Speicher <robert@gitlab.com> | 2017-08-07 19:27:42 +0000 |
---|---|---|
committer | Robert Speicher <robert@gitlab.com> | 2017-08-07 19:27:42 +0000 |
commit | dd3d422193c5f8d11ca15d3dac2ac33e55736789 (patch) | |
tree | e278567403e6855e4913822bdc2781c1afc0649b | |
parent | 4c8b666893aa9f5ab94b800489fd9e3240ad4f5e (diff) | |
parent | 38704e4247616a3122108b01b61798eed21461c9 (diff) | |
download | gitlab-ce-dd3d422193c5f8d11ca15d3dac2ac33e55736789.tar.gz |
Merge branch 'github' into 'master'
Improve GitHub importer
Closes #27429
See merge request !12886
-rw-r--r-- | app/services/projects/import_service.rb | 8 | ||||
-rw-r--r-- | changelogs/unreleased/github.yml | 4 | ||||
-rw-r--r-- | lib/github/client.rb | 36 | ||||
-rw-r--r-- | lib/github/import.rb | 34 | ||||
-rw-r--r-- | lib/gitlab/import_sources.rb | 2 | ||||
-rw-r--r-- | lib/tasks/import.rake | 3 | ||||
-rw-r--r-- | spec/lib/gitlab/import_sources_spec.rb | 2 | ||||
-rw-r--r-- | spec/services/projects/import_service_spec.rb | 32 |
8 files changed, 74 insertions, 47 deletions
diff --git a/app/services/projects/import_service.rb b/app/services/projects/import_service.rb index 50ec3651515..c3bf0031409 100644 --- a/app/services/projects/import_service.rb +++ b/app/services/projects/import_service.rb @@ -34,8 +34,12 @@ module Projects def import_repository raise Error, 'Blocked import URL.' if Gitlab::UrlBlocker.blocked_url?(project.import_url) + # We should return early for a GitHub import because the new GitHub + # importer fetch the project repositories for us. + return if project.github_import? + begin - if project.github_import? || project.gitea_import? + if project.gitea_import? fetch_repository else clone_repository @@ -55,7 +59,7 @@ module Projects end def fetch_repository - project.create_repository + project.ensure_repository project.repository.add_remote(project.import_type, project.import_url) project.repository.set_remote_as_mirror(project.import_type) project.repository.fetch_remote(project.import_type, forced: true) diff --git a/changelogs/unreleased/github.yml b/changelogs/unreleased/github.yml new file mode 100644 index 00000000000..585b9b13b65 --- /dev/null +++ b/changelogs/unreleased/github.yml @@ -0,0 +1,4 @@ +--- +title: Reduce memory usage of the GitHub importer +merge_request: 12886 +author: diff --git a/lib/github/client.rb b/lib/github/client.rb index e65d908d232..9c476df7d46 100644 --- a/lib/github/client.rb +++ b/lib/github/client.rb @@ -1,13 +1,16 @@ module Github class Client + TIMEOUT = 60 + attr_reader :connection, :rate_limit def initialize(options) - @connection = Faraday.new(url: options.fetch(:url)) do |faraday| - faraday.options.open_timeout = options.fetch(:timeout, 60) - faraday.options.timeout = options.fetch(:timeout, 60) + @connection = Faraday.new(url: options.fetch(:url, root_endpoint)) do |faraday| + faraday.options.open_timeout = options.fetch(:timeout, TIMEOUT) + faraday.options.timeout = options.fetch(:timeout, TIMEOUT) faraday.authorization 'token', options.fetch(:token) faraday.adapter :net_http + faraday.ssl.verify = verify_ssl end @rate_limit = RateLimit.new(connection) @@ -19,5 +22,32 @@ module Github Github::Response.new(connection.get(url, query)) end + + private + + def root_endpoint + custom_endpoint || github_endpoint + end + + def custom_endpoint + github_omniauth_provider.dig('args', 'client_options', 'site') + end + + def verify_ssl + # If there is no config, we're connecting to github.com + # and we should verify ssl. + github_omniauth_provider.fetch('verify_ssl', true) + end + + def github_endpoint + OmniAuth::Strategies::GitHub.default_options[:client_options][:site] + end + + def github_omniauth_provider + @github_omniauth_provider ||= + Gitlab.config.omniauth.providers + .find { |provider| provider.name == 'github' } + .to_h + end end end diff --git a/lib/github/import.rb b/lib/github/import.rb index cea4be5460b..4cc01593ef4 100644 --- a/lib/github/import.rb +++ b/lib/github/import.rb @@ -41,13 +41,16 @@ module Github self.reset_callbacks :validate end - attr_reader :project, :repository, :repo, :options, :errors, :cached, :verbose + attr_reader :project, :repository, :repo, :repo_url, :wiki_url, + :options, :errors, :cached, :verbose - def initialize(project, options) + def initialize(project, options = {}) @project = project @repository = project.repository @repo = project.import_source - @options = options + @repo_url = project.import_url + @wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git') + @options = options.reverse_merge(token: project.import_data&.credentials&.fetch(:user)) @verbose = options.fetch(:verbose, false) @cached = Hash.new { |hash, key| hash[key] = Hash.new } @errors = [] @@ -65,6 +68,8 @@ module Github fetch_pull_requests puts 'Fetching issues...'.color(:aqua) if verbose fetch_issues + puts 'Fetching releases...'.color(:aqua) if verbose + fetch_releases puts 'Cloning wiki repository...'.color(:aqua) if verbose fetch_wiki_repository puts 'Expiring repository cache...'.color(:aqua) if verbose @@ -72,6 +77,7 @@ module Github true rescue Github::RepositoryFetchError + expire_repository_cache false ensure keep_track_of_errors @@ -81,23 +87,21 @@ module Github def fetch_repository begin - project.create_repository unless project.repository.exists? - project.repository.add_remote('github', "https://#{options.fetch(:token)}@github.com/#{repo}.git") + project.ensure_repository + project.repository.add_remote('github', repo_url) project.repository.set_remote_as_mirror('github') project.repository.fetch_remote('github', forced: true) - rescue Gitlab::Shell::Error => e - error(:project, "https://github.com/#{repo}.git", e.message) + rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e + error(:project, repo_url, e.message) raise Github::RepositoryFetchError end end def fetch_wiki_repository - wiki_url = "https://#{options.fetch(:token)}@github.com/#{repo}.wiki.git" - wiki_path = "#{project.full_path}.wiki" + return if project.wiki.repository_exists? - unless project.wiki.repository_exists? - gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url) - end + wiki_path = "#{project.disk_path}.wiki" + gitlab_shell.import_repository(project.repository_storage_path, wiki_path, wiki_url) rescue Gitlab::Shell::Error => e # GitHub error message when the wiki repo has not been created, # this means that repo has wiki enabled, but have no pages. So, @@ -309,7 +313,7 @@ module Github next unless representation.valid? release = ::Release.find_or_initialize_by(project_id: project.id, tag: representation.tag) - next unless relese.new_record? + next unless release.new_record? begin release.description = representation.description @@ -337,7 +341,7 @@ module Github def user_id(user, fallback_id = nil) return unless user.present? - return cached[:user_ids][user.id] if cached[:user_ids].key?(user.id) + return cached[:user_ids][user.id] if cached[:user_ids][user.id].present? gitlab_user_id = user_id_by_external_uid(user.id) || user_id_by_email(user.email) @@ -367,7 +371,7 @@ module Github end def expire_repository_cache - repository.expire_content_cache + repository.expire_content_cache if project.repository_exists? end def keep_track_of_errors diff --git a/lib/gitlab/import_sources.rb b/lib/gitlab/import_sources.rb index 52276cbcd9a..5404dc11a87 100644 --- a/lib/gitlab/import_sources.rb +++ b/lib/gitlab/import_sources.rb @@ -8,7 +8,7 @@ module Gitlab ImportSource = Struct.new(:name, :title, :importer) ImportTable = [ - ImportSource.new('github', 'GitHub', Gitlab::GithubImport::Importer), + ImportSource.new('github', 'GitHub', Github::Import), ImportSource.new('bitbucket', 'Bitbucket', Gitlab::BitbucketImport::Importer), ImportSource.new('gitlab', 'GitLab.com', Gitlab::GitlabImport::Importer), ImportSource.new('google_code', 'Google Code', Gitlab::GoogleCodeImport::Importer), diff --git a/lib/tasks/import.rake b/lib/tasks/import.rake index 50b8e331469..96b8f59242c 100644 --- a/lib/tasks/import.rake +++ b/lib/tasks/import.rake @@ -7,7 +7,7 @@ class GithubImport end def initialize(token, gitlab_username, project_path, extras) - @options = { url: 'https://api.github.com', token: token, verbose: true } + @options = { token: token, verbose: true } @project_path = project_path @current_user = User.find_by_username(gitlab_username) @github_repo = extras.empty? ? nil : extras.first @@ -62,6 +62,7 @@ class GithubImport visibility_level: visibility_level, import_type: 'github', import_source: @repo['full_name'], + import_url: @repo['clone_url'].sub('://', "://#{@options[:token]}@"), skip_wiki: @repo['has_wiki'] ).execute end diff --git a/spec/lib/gitlab/import_sources_spec.rb b/spec/lib/gitlab/import_sources_spec.rb index b3b5e5e7e33..c5725f47453 100644 --- a/spec/lib/gitlab/import_sources_spec.rb +++ b/spec/lib/gitlab/import_sources_spec.rb @@ -56,7 +56,7 @@ describe Gitlab::ImportSources do describe '.importer' do import_sources = { - 'github' => Gitlab::GithubImport::Importer, + 'github' => Github::Import, 'bitbucket' => Gitlab::BitbucketImport::Importer, 'gitlab' => Gitlab::GitlabImport::Importer, 'google_code' => Gitlab::GoogleCodeImport::Importer, diff --git a/spec/services/projects/import_service_spec.rb b/spec/services/projects/import_service_spec.rb index c0ab1ea704d..034065aab00 100644 --- a/spec/services/projects/import_service_spec.rb +++ b/spec/services/projects/import_service_spec.rb @@ -38,8 +38,7 @@ describe Projects::ImportService do context 'with a Github repository' do it 'succeeds if repository import is successfully' do - expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) - expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true) + expect_any_instance_of(Github::Import).to receive(:execute).and_return(true) result = subject.execute @@ -52,16 +51,7 @@ describe Projects::ImportService do result = subject.execute expect(result[:status]).to eq :error - expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.full_path} - Failed to import the repository" - end - - it 'does not remove the GitHub remote' do - expect_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) - expect_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true) - - subject.execute - - expect(project.repository.raw_repository.remote_names).to include('github') + expect(result[:message]).to eq "Error importing repository #{project.import_url} into #{project.path_with_namespace} - The remote data could not be imported." end end @@ -102,8 +92,7 @@ describe Projects::ImportService do end it 'succeeds if importer succeeds' do - allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) - allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(true) + allow_any_instance_of(Github::Import).to receive(:execute).and_return(true) result = subject.execute @@ -111,10 +100,7 @@ describe Projects::ImportService do end it 'flushes various caches' do - allow_any_instance_of(Repository).to receive(:fetch_remote) - .and_return(true) - - allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute) + allow_any_instance_of(Github::Import).to receive(:execute) .and_return(true) expect_any_instance_of(Repository).to receive(:expire_content_cache) @@ -123,8 +109,7 @@ describe Projects::ImportService do end it 'fails if importer fails' do - allow_any_instance_of(Repository).to receive(:fetch_remote).and_return(true) - allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_return(false) + allow_any_instance_of(Github::Import).to receive(:execute).and_return(false) result = subject.execute @@ -133,8 +118,7 @@ describe Projects::ImportService do end it 'fails if importer raise an error' do - allow_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_return(true) - allow_any_instance_of(Gitlab::GithubImport::Importer).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API')) + allow_any_instance_of(Github::Import).to receive(:execute).and_raise(Projects::ImportService::Error.new('Github: failed to connect API')) result = subject.execute @@ -143,9 +127,9 @@ describe Projects::ImportService do end it 'expires content cache after error' do - allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false, true) + allow_any_instance_of(Project).to receive(:repository_exists?).and_return(false) - expect_any_instance_of(Gitlab::Shell).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new('Failed to import the repository')) + expect_any_instance_of(Repository).to receive(:fetch_remote).and_raise(Gitlab::Shell::Error.new) expect_any_instance_of(Repository).to receive(:expire_content_cache) subject.execute |