48 files changed, 2239 insertions, 970 deletions
diff --git a/lib/gitlab/github_import/base_formatter.rb b/lib/gitlab/github_import/base_formatter.rb
deleted file mode 100644
index f330041cc00..00000000000
--- a/lib/gitlab/github_import/base_formatter.rb
+++ /dev/null
@@ -1,26 +0,0 @@
-module Gitlab
-  module GithubImport
-    class BaseFormatter
-      attr_reader :client, :formatter, :project, :raw_data
-
-      def initialize(project, raw_data, client = nil)
-        @project = project
-        @raw_data = raw_data
-        @client = client
-        @formatter = Gitlab::ImportFormatter.new
-      end
-
-      def create!
-        association = project.public_send(project_association) # rubocop:disable GitlabSecurity/PublicSend
-
-        association.find_or_create_by!(find_condition) do |record|
-          record.attributes = attributes
-        end
-      end
-
-      def url
-        raw_data.url || ''
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/branch_formatter.rb b/lib/gitlab/github_import/branch_formatter.rb
deleted file mode 100644
index 8aa885fb811..00000000000
--- a/lib/gitlab/github_import/branch_formatter.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-module Gitlab
-  module GithubImport
-    class BranchFormatter < BaseFormatter
-      delegate :repo, :sha, :ref, to: :raw_data
-
-      def exists?
-        branch_exists? && commit_exists?
-      end
-
-      def valid?
-        sha.present? && ref.present?
-      end
-
-      def user
-        raw_data.user&.login || 'unknown'
-      end
-
-      def short_sha
-        Commit.truncate_sha(sha)
-      end
-
-      private
-
-      def branch_exists?
-        project.repository.branch_exists?(ref)
-      end
-
-      def commit_exists?
-        project.repository.branch_names_contains(sha).include?(ref)
-      end
-
-      def short_id
-        sha.to_s[0..7]
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb
new file mode 100644
index 00000000000..147597289cf
--- /dev/null
+++ b/lib/gitlab/github_import/bulk_importing.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module BulkImporting
+      # Builds and returns an Array of objects to bulk insert into the
+      # database.
+      #
+      # enum - An Enumerable that returns the objects to turn into database
+      #        rows.
+      def build_database_rows(enum)
+        enum.each_with_object([]) do |(object, _), rows|
+          rows << build(object) unless already_imported?(object)
+        end
+      end
+
+      # Bulk inserts the given rows into the database.
+      def bulk_insert(model, rows, batch_size: 100)
+        rows.each_slice(batch_size) do |slice|
+          Gitlab::Database.bulk_insert(model.table_name, slice)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/caching.rb b/lib/gitlab/github_import/caching.rb
new file mode 100644
index 00000000000..b08f133794f
--- /dev/null
+++ b/lib/gitlab/github_import/caching.rb
@@ -0,0 +1,151 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Caching
+      # The default timeout of the cache keys.
+      TIMEOUT = 24.hours.to_i
+
+      WRITE_IF_GREATER_SCRIPT = <<-EOF.strip_heredoc.freeze
+      local key, value, ttl = KEYS[1], tonumber(ARGV[1]), ARGV[2]
+      local existing = tonumber(redis.call("get", key))
+
+      if existing == nil or value > existing then
+        redis.call("set", key, value)
+        redis.call("expire", key, ttl)
+        return true
+      else
+        return false
+      end
+      EOF
+
+      # Reads a cache key.
+      #
+      # If the key exists and has a non-empty value its TTL is refreshed
+      # automatically.
+      #
+      # raw_key - The cache key to read.
+      # timeout - The new timeout of the key if the key is to be refreshed.
+      def self.read(raw_key, timeout: TIMEOUT)
+        key = cache_key_for(raw_key)
+        value = Redis::Cache.with { |redis| redis.get(key) }
+
+        if value.present?
+          # We refresh the expiration time so frequently used keys stick
+          # around, removing the need for querying the database as much as
+          # possible.
+          #
+          # A key may be empty when we looked up a GitHub user (for example) but
+          # did not find a matching GitLab user. In that case we _don't_ want to
+          # refresh the TTL so we automatically pick up the right data when said
+          # user were to register themselves on the GitLab instance.
+          Redis::Cache.with { |redis| redis.expire(key, timeout) }
+        end
+
+        value
+      end
+
+      # Reads an integer from the cache, or returns nil if no value was found.
+      #
+      # See Caching.read for more information.
+      def self.read_integer(raw_key, timeout: TIMEOUT)
+        value = read(raw_key, timeout: timeout)
+
+        value.to_i if value.present?
+      end
+
+      # Sets a cache key to the given value.
+      #
+      # key - The cache key to write.
+      # value - The value to set.
+      # timeout - The time after which the cache key should expire.
+      def self.write(raw_key, value, timeout: TIMEOUT)
+        key = cache_key_for(raw_key)
+
+        Redis::Cache.with do |redis|
+          redis.set(key, value, ex: timeout)
+        end
+
+        value
+      end
+
+      # Adds a value to a set.
+      #
+      # raw_key - The key of the set to add the value to.
+      # value - The value to add to the set.
+      # timeout - The new timeout of the key.
+      def self.set_add(raw_key, value, timeout: TIMEOUT)
+        key = cache_key_for(raw_key)
+
+        Redis::Cache.with do |redis|
+          redis.multi do |m|
+            m.sadd(key, value)
+            m.expire(key, timeout)
+          end
+        end
+      end
+
+      # Returns true if the given value is present in the set.
+      #
+      # raw_key - The key of the set to check.
+      # value - The value to check for.
+      def self.set_includes?(raw_key, value)
+        key = cache_key_for(raw_key)
+
+        Redis::Cache.with do |redis|
+          redis.sismember(key, value)
+        end
+      end
+
+      # Sets multiple keys to a given value.
+      #
+      # mapping - A Hash mapping the cache keys to their values.
+      # timeout - The time after which the cache key should expire.
+      def self.write_multiple(mapping, timeout: TIMEOUT)
+        Redis::Cache.with do |redis|
+          redis.multi do |multi|
+            mapping.each do |raw_key, value|
+              multi.set(cache_key_for(raw_key), value, ex: timeout)
+            end
+          end
+        end
+      end
+
+      # Sets the expiration time of a key.
+      #
+      # raw_key - The key for which to change the timeout.
+      # timeout - The new timeout.
+      def self.expire(raw_key, timeout)
+        key = cache_key_for(raw_key)
+
+        Redis::Cache.with do |redis|
+          redis.expire(key, timeout)
+        end
+      end
+
+      # Sets a key to the given integer but only if the existing value is
+      # smaller than the given value.
+      #
+      # This method uses a Lua script to ensure the read and write are atomic.
+      #
+      # raw_key - The key to set.
+      # value - The new value for the key.
+      # timeout - The key timeout in seconds.
+      #
+      # Returns true when the key was overwritten, false otherwise.
+      def self.write_if_greater(raw_key, value, timeout: TIMEOUT)
+        key = cache_key_for(raw_key)
+        val = Redis::Cache.with do |redis|
+          redis
+            .eval(WRITE_IF_GREATER_SCRIPT, keys: [key], argv: [value, timeout])
+        end
+
+        val ? true : false
+      end
+
+      def self.cache_key_for(raw_key)
+        "#{Redis::Cache::CACHE_NAMESPACE}:#{raw_key}"
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/client.rb b/lib/gitlab/github_import/client.rb
index 0550f9695bd..844530b1ea7 100644
--- a/lib/gitlab/github_import/client.rb
+++ b/lib/gitlab/github_import/client.rb
@@ -1,147 +1,184 @@
+# frozen_string_literal: true
+
 module Gitlab
   module GithubImport
+    # HTTP client for interacting with the GitHub API.
+    #
+    # This class is basically a fancy wrapped around Octokit while adding some
+    # functionality to deal with rate limiting and parallel imports. Usage is
+    # mostly the same as Octokit, for example:
+    #
+    #     client = GithubImport::Client.new('hunter2')
+    #
+    #     client.labels.each do |label|
+    #       puts label.name
+    #     end
     class Client
-      GITHUB_SAFE_REMAINING_REQUESTS = 100
-      GITHUB_SAFE_SLEEP_TIME = 500
+      attr_reader :octokit
 
-      attr_reader :access_token, :host, :api_version
+      # A single page of data and the corresponding page number.
+      Page = Struct.new(:objects, :number)
 
-      def initialize(access_token, host: nil, api_version: 'v3')
-        @access_token = access_token
-        @host = host.to_s.sub(%r{/+\z}, '')
-        @api_version = api_version
-        @users = {}
+      # The minimum number of requests we want to keep available.
+      #
+      # We don't use a value of 0 as multiple threads may be using the same
+      # token in parallel. This could result in all of them hitting the GitHub
+      # rate limit at once. The threshold is put in place to not hit the limit
+      # in most cases.
+      RATE_LIMIT_THRESHOLD = 50
 
-        if access_token
-          ::Octokit.auto_paginate = false
-        end
+      # token - The GitHub API token to use.
+      #
+      # per_page - The number of objects that should be displayed per page.
+      #
+      # parallel - When set to true hitting the rate limit will result in a
+      #            dedicated error being raised. When set to `false` we will
+      #            instead just `sleep()` until the rate limit is reset. Setting
+      #            this value to `true` for parallel importing is crucial as
+      #            otherwise hitting the rate limit will result in a thread
+      #            being blocked in a `sleep()` call for up to an hour.
+      def initialize(token, per_page: 100, parallel: true)
+        @octokit = Octokit::Client.new(access_token: token, per_page: per_page)
+        @parallel = parallel
       end
 
-      def api
-        @api ||= ::Octokit::Client.new(
-          access_token: access_token,
-          api_endpoint: api_endpoint,
-          # If there is no config, we're connecting to github.com and we
-          # should verify ssl.
-          connection_options: {
-            ssl: { verify: config ? config['verify_ssl'] : true }
-          }
-        )
+      def parallel?
+        @parallel
       end
 
-      def client
-        unless config
-          raise Projects::ImportService::Error,
-            'OAuth configuration for GitHub missing.'
-        end
-
-        @client ||= ::OAuth2::Client.new(
-          config.app_id,
-          config.app_secret,
-          github_options.merge(ssl: { verify: config['verify_ssl'] })
-        )
+      # Returns the details of a GitHub user.
+      #
+      # username - The username of the user.
+      def user(username)
+        with_rate_limit { octokit.user(username) }
       end
 
-      def authorize_url(redirect_uri)
-        client.auth_code.authorize_url({
-          redirect_uri: redirect_uri,
-          scope: "repo, user, user:email"
-        })
+      # Returns the details of a GitHub repository.
+      #
+      # name - The path (in the form `owner/repository`) of the repository.
+      def repository(name)
+        with_rate_limit { octokit.repo(name) }
       end
 
-      def get_token(code)
-        client.auth_code.get_token(code).token
+      def labels(*args)
+        each_object(:labels, *args)
       end
 
-      def method_missing(method, *args, &block)
-        if api.respond_to?(method)
-          request(method, *args, &block)
-        else
-          super(method, *args, &block)
-        end
+      def milestones(*args)
+        each_object(:milestones, *args)
       end
 
-      def respond_to?(method)
-        api.respond_to?(method) || super
+      def releases(*args)
+        each_object(:releases, *args)
       end
 
-      def user(login)
-        return nil unless login.present?
-        return @users[login] if @users.key?(login)
+      # Fetches data from the GitHub API and yields a Page object for every page
+      # of data, without loading all of them into memory.
+      #
+      # method - The Octokit method to use for getting the data.
+      # args - Arguments to pass to the Octokit method.
+      #
+      # rubocop: disable GitlabSecurity/PublicSend
+      def each_page(method, *args, &block)
+        return to_enum(__method__, method, *args) unless block_given?
 
-        @users[login] = api.user(login)
-      end
+        page =
+          if args.last.is_a?(Hash) && args.last[:page]
+            args.last[:page]
+          else
+            1
+          end
 
-      private
+        collection = with_rate_limit { octokit.public_send(method, *args) }
+        next_url = octokit.last_response.rels[:next]
 
-      def api_endpoint
-        if host.present? && api_version.present?
-          "#{host}/api/#{api_version}"
-        else
-          github_options[:site]
+        yield Page.new(collection, page)
+
+        while next_url
+          response = with_rate_limit { next_url.get }
+          next_url = response.rels[:next]
+
+          yield Page.new(response.data, page += 1)
         end
       end
 
-      def config
-        Gitlab.config.omniauth.providers.find { |provider| provider.name == "github" }
-      end
+      # Iterates over all of the objects for the given method (e.g. `:labels`).
+      #
+      # method - The method to send to Octokit for querying data.
+      # args - Any arguments to pass to the Octokit method.
+      def each_object(method, *args, &block)
+        return to_enum(__method__, method, *args) unless block_given?
 
-      def github_options
-        if config
-          config["args"]["client_options"].deep_symbolize_keys
-        else
-          OmniAuth::Strategies::GitHub.default_options[:client_options].symbolize_keys
+        each_page(method, *args) do |page|
+          page.objects.each do |object|
+            yield object
+          end
         end
       end
 
-      def rate_limit
-        api.rate_limit!
-      # GitHub Rate Limit API returns 404 when the rate limit is
-      # disabled. In this case we just want to return gracefully
-      # instead of spitting out an error.
-      rescue Octokit::NotFound
-        nil
-      end
+      # Yields the supplied block, responding to any rate limit errors.
+      #
+      # The exact strategy used for handling rate limiting errors depends on
+      # whether we are running in parallel mode or not. For more information see
+      # `#rate_or_wait_for_rate_limit`.
+      def with_rate_limit
+        request_count_counter.increment
 
-      def has_rate_limit?
-        return @has_rate_limit if defined?(@has_rate_limit)
+        raise_or_wait_for_rate_limit unless requests_remaining?
 
-        @has_rate_limit = rate_limit.present?
-      end
+        begin
+          yield
+        rescue Octokit::TooManyRequests
+          raise_or_wait_for_rate_limit
 
-      def rate_limit_exceed?
-        has_rate_limit? && rate_limit.remaining <= GITHUB_SAFE_REMAINING_REQUESTS
+          # This retry will only happen when running in sequential mode as we'll
+          # raise an error in parallel mode.
+          retry
+        end
       end
 
-      def rate_limit_sleep_time
-        rate_limit.resets_in + GITHUB_SAFE_SLEEP_TIME
+      # Returns `true` if we're still allowed to perform API calls.
+      def requests_remaining?
+        remaining_requests > RATE_LIMIT_THRESHOLD
       end
 
-      def request(method, *args, &block)
-        sleep rate_limit_sleep_time if rate_limit_exceed?
-
-        data = api.__send__(method, *args) # rubocop:disable GitlabSecurity/PublicSend
-        return data unless data.is_a?(Array)
+      def remaining_requests
+        octokit.rate_limit.remaining
+      end
 
-        last_response = api.last_response
+      def raise_or_wait_for_rate_limit
+        rate_limit_counter.increment
 
-        if block_given?
-          yield data
-          # api.last_response could change while we're yielding (e.g. fetching labels for each PR)
-          # so we cache our own last response
-          each_response_page(last_response, &block)
+        if parallel?
+          raise RateLimitError
         else
-          each_response_page(last_response) { |page| data.concat(page) }
-          data
+          sleep(rate_limit_resets_in)
         end
       end
 
-      def each_response_page(last_response)
-        while last_response.rels[:next]
-          sleep rate_limit_sleep_time if rate_limit_exceed?
-          last_response = last_response.rels[:next].get
-          yield last_response.data if last_response.data.is_a?(Array)
-        end
+      def rate_limit_resets_in
+        # We add a few seconds to the rate limit so we don't _immediately_
+        # resume when the rate limit resets as this may result in us performing
+        # a request before GitHub has a chance to reset the limit.
+        octokit.rate_limit.resets_in + 5
+      end
+
+      def respond_to_missing?(method, include_private = false)
+        octokit.respond_to?(method, include_private)
+      end
+
+      def rate_limit_counter
+        @rate_limit_counter ||= Gitlab::Metrics.counter(
+          :github_importer_rate_limit_hits,
+          'The number of times we hit the GitHub rate limit when importing projects'
+        )
+      end
+
+      def request_count_counter
+        @request_counter ||= Gitlab::Metrics.counter(
+          :github_importer_request_count,
+          'The number of GitHub API calls performed when importing projects'
+        )
       end
     end
   end
diff --git a/lib/gitlab/github_import/comment_formatter.rb b/lib/gitlab/github_import/comment_formatter.rb
deleted file mode 100644
index 8911b81ec9a..00000000000
--- a/lib/gitlab/github_import/comment_formatter.rb
+++ /dev/null
@@ -1,69 +0,0 @@
-module Gitlab
-  module GithubImport
-    class CommentFormatter < BaseFormatter
-      attr_writer :author_id
-
-      def attributes
-        {
-          project: project,
-          note: note,
-          commit_id: raw_data.commit_id,
-          line_code: line_code,
-          author_id: author_id,
-          type: type,
-          created_at: raw_data.created_at,
-          updated_at: raw_data.updated_at
-        }
-      end
-
-      private
-
-      def author
-        @author ||= UserFormatter.new(client, raw_data.user)
-      end
-
-      def author_id
-        author.gitlab_id || project.creator_id
-      end
-
-      def body
-        raw_data.body || ""
-      end
-
-      def line_code
-        return unless on_diff?
-
-        parsed_lines = Gitlab::Diff::Parser.new.parse(diff_hunk.lines)
-        generate_line_code(parsed_lines.to_a.last)
-      end
-
-      def generate_line_code(line)
-        Gitlab::Git.diff_line_code(file_path, line.new_pos, line.old_pos)
-      end
-
-      def on_diff?
-        diff_hunk.present?
-      end
-
-      def diff_hunk
-        raw_data.diff_hunk
-      end
-
-      def file_path
-        raw_data.path
-      end
-
-      def note
-        if author.gitlab_id
-          body
-        else
-          formatter.author_line(author.login) + body
-        end
-      end
-
-      def type
-        'LegacyDiffNote' if on_diff?
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/importer.rb b/lib/gitlab/github_import/importer.rb
deleted file mode 100644
index b8c07460ebb..00000000000
--- a/lib/gitlab/github_import/importer.rb
+++ /dev/null
@@ -1,329 +0,0 @@
-module Gitlab
-  module GithubImport
-    class Importer
-      include Gitlab::ShellAdapter
-
-      attr_reader :errors, :project, :repo, :repo_url
-
-      def initialize(project)
-        @project  = project
-        @repo     = project.import_source
-        @repo_url = project.import_url
-        @errors   = []
-        @labels   = {}
-      end
-
-      def client
-        return @client if defined?(@client)
-        unless credentials
-          raise Projects::ImportService::Error,
-                "Unable to find project import data credentials for project ID: #{@project.id}"
-        end
-
-        opts = {}
-        # Gitea plan to be GitHub compliant
-        if project.gitea_import?
-          uri = URI.parse(project.import_url)
-          host = "#{uri.scheme}://#{uri.host}:#{uri.port}#{uri.path}".sub(%r{/?[\w-]+/[\w-]+\.git\z}, '')
-          opts = {
-            host: host,
-            api_version: 'v1'
-          }
-        end
-
-        @client = Client.new(credentials[:user], opts)
-      end
-
-      def execute
-        # The ordering of importing is important here due to the way GitHub structures their data
-        # 1. Labels are required by other items while not having a dependency on anything else
-        # so need to be first
-        # 2. Pull requests must come before issues. Every pull request is also an issue but not
-        # all issues are pull requests. Only the issue entity has labels defined in GitHub. GitLab
-        # doesn't structure data like this so we need to make sure that we've created the MRs
-        # before we attempt to add the labels defined in the GitHub issue for the related, already
-        # imported, pull request
-        import_labels
-        import_milestones
-        import_pull_requests
-        import_issues
-        import_comments(:issues)
-        import_comments(:pull_requests)
-        import_wiki
-
-        # Gitea doesn't have a Release API yet
-        # See https://github.com/go-gitea/gitea/issues/330
-        unless project.gitea_import?
-          import_releases
-        end
-
-        handle_errors
-
-        true
-      end
-
-      private
-
-      def credentials
-        return @credentials if defined?(@credentials)
-
-        @credentials = project.import_data ? project.import_data.credentials : nil
-      end
-
-      def handle_errors
-        return unless errors.any?
-
-        project.update_column(:import_error, {
-          message: 'The remote data could not be fully imported.',
-          errors: errors
-        }.to_json)
-      end
-
-      def import_labels
-        fetch_resources(:labels, repo, per_page: 100) do |labels|
-          labels.each do |raw|
-            begin
-              gh_label = LabelFormatter.new(project, raw)
-              gh_label.create!
-            rescue => e
-              errors << { type: :label, url: Gitlab::UrlSanitizer.sanitize(gh_label.url), errors: e.message }
-            end
-          end
-        end
-
-        cache_labels!
-      end
-
-      def import_milestones
-        fetch_resources(:milestones, repo, state: :all, per_page: 100) do |milestones|
-          milestones.each do |raw|
-            begin
-              gh_milestone = MilestoneFormatter.new(project, raw)
-              gh_milestone.create!
-            rescue => e
-              errors << { type: :milestone, url: Gitlab::UrlSanitizer.sanitize(gh_milestone.url), errors: e.message }
-            end
-          end
-        end
-      end
-
-      def import_issues
-        fetch_resources(:issues, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |issues|
-          issues.each do |raw|
-            gh_issue = IssueFormatter.new(project, raw, client)
-
-            begin
-              issuable =
-                if gh_issue.pull_request?
-                  MergeRequest.find_by(target_project_id: project.id, iid: gh_issue.number)
-                else
-                  gh_issue.create!
-                end
-
-              apply_labels(issuable, raw)
-            rescue => e
-              errors << { type: :issue, url: Gitlab::UrlSanitizer.sanitize(gh_issue.url), errors: e.message }
-            end
-          end
-        end
-      end
-
-      def import_pull_requests
-        fetch_resources(:pull_requests, repo, state: :all, sort: :created, direction: :asc, per_page: 100) do |pull_requests|
-          pull_requests.each do |raw|
-            gh_pull_request = PullRequestFormatter.new(project, raw, client)
-
-            next unless gh_pull_request.valid?
-
-            begin
-              restore_source_branch(gh_pull_request) unless gh_pull_request.source_branch_exists?
-              restore_target_branch(gh_pull_request) unless gh_pull_request.target_branch_exists?
-
-              merge_request = gh_pull_request.create!
-
-              # Gitea doesn't return PR in the Issue API endpoint, so labels must be assigned at this stage
-              if project.gitea_import?
-                apply_labels(merge_request, raw)
-              end
-            rescue => e
-              errors << { type: :pull_request, url: Gitlab::UrlSanitizer.sanitize(gh_pull_request.url), errors: e.message }
-            ensure
-              clean_up_restored_branches(gh_pull_request)
-            end
-          end
-        end
-
-        project.repository.after_remove_branch
-      end
-
-      def restore_source_branch(pull_request)
-        project.repository.create_branch(pull_request.source_branch_name, pull_request.source_branch_sha)
-      end
-
-      def restore_target_branch(pull_request)
-        project.repository.create_branch(pull_request.target_branch_name, pull_request.target_branch_sha)
-      end
-
-      def remove_branch(name)
-        project.repository.delete_branch(name)
-      rescue Gitlab::Git::Repository::DeleteBranchFailed
-        errors << { type: :remove_branch, name: name }
-      end
-
-      def clean_up_restored_branches(pull_request)
-        return if pull_request.opened?
-
-        remove_branch(pull_request.source_branch_name) unless pull_request.source_branch_exists?
-        remove_branch(pull_request.target_branch_name) unless pull_request.target_branch_exists?
-      end
-
-      def apply_labels(issuable, raw)
-        return unless raw.labels.count > 0
-
-        label_ids = raw.labels
-          .map { |attrs| @labels[attrs.name] }
-          .compact
-
-        issuable.update_attribute(:label_ids, label_ids)
-      end
-
-      def import_comments(issuable_type)
-        resource_type = "#{issuable_type}_comments".to_sym
-
-        # Two notes here:
-        # 1. We don't have a distinctive attribute for comments (unlike issues iid), so we fetch the last inserted note,
-        # compare it against every comment in the current imported page until we find match, and that's where start importing
-        # 2. GH returns comments for _both_ issues and PRs through issues_comments API, while pull_requests_comments returns
-        # only comments on diffs, so select last note not based on noteable_type but on line_code
-        line_code_is = issuable_type == :pull_requests ? 'NOT NULL' : 'NULL'
-        last_note    = project.notes.where("line_code IS #{line_code_is}").last
-
-        fetch_resources(resource_type, repo, per_page: 100) do |comments|
-          if last_note
-            discard_inserted_comments(comments, last_note)
-            last_note = nil
-          end
-
-          create_comments(comments)
-        end
-      end
-
-      def create_comments(comments)
-        ActiveRecord::Base.no_touching do
-          comments.each do |raw|
-            begin
-              comment = CommentFormatter.new(project, raw, client)
-
-              # GH does not return info about comment's parent, so we guess it by checking its URL!
-              *_, parent, iid = URI(raw.html_url).path.split('/')
-
-              issuable = if parent == 'issues'
-                           Issue.find_by(project_id: project.id, iid: iid)
-                         else
-                           MergeRequest.find_by(target_project_id: project.id, iid: iid)
-                         end
-
-              next unless issuable
-
-              issuable.notes.create!(comment.attributes)
-            rescue => e
-              errors << { type: :comment, url: Gitlab::UrlSanitizer.sanitize(raw.url), errors: e.message }
-            end
-          end
-        end
-      end
-
-      def discard_inserted_comments(comments, last_note)
-        last_note_attrs = nil
-
-        cut_off_index = comments.find_index do |raw|
-          comment           = CommentFormatter.new(project, raw)
-          comment_attrs     = comment.attributes
-          last_note_attrs ||= last_note.slice(*comment_attrs.keys)
-
-          comment_attrs.with_indifferent_access == last_note_attrs
-        end
-
-        # No matching resource in the collection, which means we got halted right on the end of the last page, so all good
-        return unless cut_off_index
-
-        # Otherwise, remove the resources we've already inserted
-        comments.shift(cut_off_index + 1)
-      end
-
-      def import_wiki
-        unless project.wiki.repository_exists?
-          wiki = WikiFormatter.new(project)
-          gitlab_shell.import_repository(project.repository_storage_path, wiki.disk_path, wiki.import_url)
-        end
-      rescue Gitlab::Shell::Error => e
-        # GitHub error message when the wiki repo has not been created,
-        # this means that repo has wiki enabled, but have no pages. So,
-        # we can skip the import.
-        if e.message !~ /repository not exported/
-          errors << { type: :wiki, errors: e.message }
-        end
-      end
-
-      def import_releases
-        fetch_resources(:releases, repo, per_page: 100) do |releases|
-          releases.each do |raw|
-            begin
-              gh_release = ReleaseFormatter.new(project, raw)
-              gh_release.create! if gh_release.valid?
-            rescue => e
-              errors << { type: :release, url: Gitlab::UrlSanitizer.sanitize(gh_release.url), errors: e.message }
-            end
-          end
-        end
-      end
-
-      def cache_labels!
-        project.labels.select(:id, :title).find_each do |label|
-          @labels[label.title] = label.id
-        end
-      end
-
-      def fetch_resources(resource_type, *opts)
-        return if imported?(resource_type)
-
-        opts.last[:page] = current_page(resource_type)
-
-        client.public_send(resource_type, *opts) do |resources| # rubocop:disable GitlabSecurity/PublicSend
-          yield resources
-          increment_page(resource_type)
-        end
-
-        imported!(resource_type)
-      end
-
-      def imported?(resource_type)
-        Rails.cache.read("#{cache_key_prefix}:#{resource_type}:imported")
-      end
-
-      def imported!(resource_type)
-        Rails.cache.write("#{cache_key_prefix}:#{resource_type}:imported", true, ex: 1.day)
-      end
-
-      def increment_page(resource_type)
-        key = "#{cache_key_prefix}:#{resource_type}:current-page"
-
-        # Rails.cache.increment calls INCRBY directly on the value stored under the key, which is
-        # a serialized ActiveSupport::Cache::Entry, so it will return an error by Redis, hence this ugly work-around
-        page = Rails.cache.read(key)
-        page += 1
-        Rails.cache.write(key, page)
-
-        page
-      end
-
-      def current_page(resource_type)
-        Rails.cache.fetch("#{cache_key_prefix}:#{resource_type}:current-page", ex: 1.day) { 1 }
-      end
-
-      def cache_key_prefix
-        @cache_key_prefix ||= "github-import:#{project.id}"
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/importer/diff_note_importer.rb b/lib/gitlab/github_import/importer/diff_note_importer.rb
new file mode 100644
index 00000000000..8274f37d358
--- /dev/null
+++ b/lib/gitlab/github_import/importer/diff_note_importer.rb
@@ -0,0 +1,63 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class DiffNoteImporter
+        attr_reader :note, :project, :client, :user_finder
+
+        # note - An instance of `Gitlab::GithubImport::Representation::DiffNote`.
+        # project - An instance of `Project`.
+        # client - An instance of `Gitlab::GithubImport::Client`.
+        def initialize(note, project, client)
+          @note = note
+          @project = project
+          @client = client
+          @user_finder = UserFinder.new(project, client)
+        end
+
+        def execute
+          return unless (mr_id = find_merge_request_id)
+
+          author_id, author_found = user_finder.author_id_for(note)
+
+          note_body =
+            MarkdownText.format(note.note, note.author, author_found)
+
+          attributes = {
+            noteable_type: 'MergeRequest',
+            noteable_id: mr_id,
+            project_id: project.id,
+            author_id: author_id,
+            note: note_body,
+            system: false,
+            commit_id: note.commit_id,
+            line_code: note.line_code,
+            type: 'LegacyDiffNote',
+            created_at: note.created_at,
+            updated_at: note.updated_at,
+            st_diff: note.diff_hash.to_yaml
+          }
+
+          # It's possible that during an import we'll insert tens of thousands
+          # of diff notes. If we were to use the Note/LegacyDiffNote model here
+          # we'd also have to run additional queries for both validations and
+          # callbacks, putting a lot of pressure on the database.
+          #
+          # To work around this we're using bulk_insert with a single row. This
+          # allows us to efficiently insert data (even if it's just 1 row)
+          # without having to use all sorts of hacks to disable callbacks.
+          Gitlab::Database.bulk_insert(LegacyDiffNote.table_name, [attributes])
+        rescue ActiveRecord::InvalidForeignKey
+          # It's possible the project and the issue have been deleted since
+          # scheduling this job. In this case we'll just skip creating the note.
+        end
+
+        # Returns the ID of the merge request this note belongs to.
+        def find_merge_request_id
+          GithubImport::IssuableFinder.new(project, note).database_id
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/diff_notes_importer.rb b/lib/gitlab/github_import/importer/diff_notes_importer.rb
new file mode 100644
index 00000000000..966f12c5c2f
--- /dev/null
+++ b/lib/gitlab/github_import/importer/diff_notes_importer.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class DiffNotesImporter
+        include ParallelScheduling
+
+        def representation_class
+          Representation::DiffNote
+        end
+
+        def importer_class
+          DiffNoteImporter
+        end
+
+        def sidekiq_worker_class
+          ImportDiffNoteWorker
+        end
+
+        def collection_method
+          :pull_requests_comments
+        end
+
+        def id_for_already_imported_cache(note)
+          note.id
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb b/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb
new file mode 100644
index 00000000000..bad064b76c8
--- /dev/null
+++ b/lib/gitlab/github_import/importer/issue_and_label_links_importer.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class IssueAndLabelLinksImporter
+        attr_reader :issue, :project, :client
+
+        # issue - An instance of `Gitlab::GithubImport::Representation::Issue`.
+        # project - An instance of `Project`
+        # client - An instance of `Gitlab::GithubImport::Client`
+        def initialize(issue, project, client)
+          @issue = issue
+          @project = project
+          @client = client
+        end
+
+        def execute
+          IssueImporter.import_if_issue(issue, project, client)
+          LabelLinksImporter.new(issue, project, client).execute
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb
new file mode 100644
index 00000000000..31fefebf787
--- /dev/null
+++ b/lib/gitlab/github_import/importer/issue_importer.rb
@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class IssueImporter
+        attr_reader :project, :issue, :client, :user_finder, :milestone_finder,
+                    :issuable_finder
+
+        # Imports an issue if it's a regular issue and not a pull request.
+        def self.import_if_issue(issue, project, client)
+          new(issue, project, client).execute unless issue.pull_request?
+        end
+
+        # issue - An instance of `Gitlab::GithubImport::Representation::Issue`.
+        # project - An instance of `Project`
+        # client - An instance of `Gitlab::GithubImport::Client`
+        def initialize(issue, project, client)
+          @issue = issue
+          @project = project
+          @client = client
+          @user_finder = UserFinder.new(project, client)
+          @milestone_finder = MilestoneFinder.new(project)
+          @issuable_finder = GithubImport::IssuableFinder.new(project, issue)
+        end
+
+        def execute
+          Issue.transaction do
+            if (issue_id = create_issue)
+              create_assignees(issue_id)
+              issuable_finder.cache_database_id(issue_id)
+            end
+          end
+        end
+
+        # Creates a new GitLab issue for the current GitHub issue.
+        #
+        # Returns the ID of the created issue as an Integer. If the issue
+        # couldn't be created this method will return `nil` instead.
+        def create_issue
+          author_id, author_found = user_finder.author_id_for(issue)
+
+          description =
+            MarkdownText.format(issue.description, issue.author, author_found)
+
+          attributes = {
+            iid: issue.iid,
+            title: issue.truncated_title,
+            author_id: author_id,
+            project_id: project.id,
+            description: description,
+            milestone_id: milestone_finder.id_for(issue),
+            state: issue.state,
+            created_at: issue.created_at,
+            updated_at: issue.updated_at
+          }
+
+          GithubImport.insert_and_return_id(attributes, project.issues)
+        rescue ActiveRecord::InvalidForeignKey
+          # It's possible the project has been deleted since scheduling this
+          # job. In this case we'll just skip creating the issue.
+        end
+
+        # Stores all issue assignees in the database.
+        #
+        # issue_id - The ID of the created issue.
+        def create_assignees(issue_id)
+          assignees = []
+
+          issue.assignees.each do |assignee|
+            if (user_id = user_finder.user_id_for(assignee))
+              assignees << { issue_id: issue_id, user_id: user_id }
+            end
+          end
+
+          Gitlab::Database.bulk_insert(IssueAssignee.table_name, assignees)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/issues_importer.rb b/lib/gitlab/github_import/importer/issues_importer.rb
new file mode 100644
index 00000000000..ac6d0666b3a
--- /dev/null
+++ b/lib/gitlab/github_import/importer/issues_importer.rb
@@ -0,0 +1,35 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class IssuesImporter
+        include ParallelScheduling
+
+        def importer_class
+          IssueAndLabelLinksImporter
+        end
+
+        def representation_class
+          Representation::Issue
+        end
+
+        def sidekiq_worker_class
+          ImportIssueWorker
+        end
+
+        def collection_method
+          :issues
+        end
+
+        def id_for_already_imported_cache(issue)
+          issue.number
+        end
+
+        def collection_options
+          { state: 'all', sort: 'created', direction: 'asc' }
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/label_links_importer.rb b/lib/gitlab/github_import/importer/label_links_importer.rb
new file mode 100644
index 00000000000..2001b7e3482
--- /dev/null
+++ b/lib/gitlab/github_import/importer/label_links_importer.rb
@@ -0,0 +1,52 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class LabelLinksImporter
+        attr_reader :issue, :project, :client, :label_finder
+
+        # issue - An instance of `Gitlab::GithubImport::Representation::Issue`
+        # project - An instance of `Project`
+        # client - An instance of `Gitlab::GithubImport::Client`
+        def initialize(issue, project, client)
+          @issue = issue
+          @project = project
+          @client = client
+          @label_finder = LabelFinder.new(project)
+        end
+
+        def execute
+          create_labels
+        end
+
+        def create_labels
+          time = Time.zone.now
+          rows = []
+          target_id = find_target_id
+
+          issue.label_names.each do |label_name|
+            # Although unlikely it's technically possible for an issue to be
+            # given a label that was created and assigned after we imported all
+            # the project's labels.
+            next unless (label_id = label_finder.id_for(label_name))
+
+            rows << {
+              label_id: label_id,
+              target_id: target_id,
+              target_type: issue.issuable_type,
+              created_at: time,
+              updated_at: time
+            }
+          end
+
+          Gitlab::Database.bulk_insert(LabelLink.table_name, rows)
+        end
+
+        def find_target_id
+          GithubImport::IssuableFinder.new(project, issue).database_id
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/labels_importer.rb b/lib/gitlab/github_import/importer/labels_importer.rb
new file mode 100644
index 00000000000..a73033d35ba
--- /dev/null
+++ b/lib/gitlab/github_import/importer/labels_importer.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class LabelsImporter
+        include BulkImporting
+
+        attr_reader :project, :client, :existing_labels
+
+        # project - An instance of `Project`.
+        # client - An instance of `Gitlab::GithubImport::Client`.
+        def initialize(project, client)
+          @project = project
+          @client = client
+          @existing_labels = project.labels.pluck(:title).to_set
+        end
+
+        def execute
+          bulk_insert(Label, build_labels)
+          build_labels_cache
+        end
+
+        def build_labels
+          build_database_rows(each_label)
+        end
+
+        def already_imported?(label)
+          existing_labels.include?(label.name)
+        end
+
+        def build_labels_cache
+          LabelFinder.new(project).build_cache
+        end
+
+        def build(label)
+          time = Time.zone.now
+
+          {
+            title: label.name,
+            color: '#' + label.color,
+            project_id: project.id,
+            type: 'ProjectLabel',
+            created_at: time,
+            updated_at: time
+          }
+        end
+
+        def each_label
+          client.labels(project.import_source)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb
new file mode 100644
index 00000000000..c53480e828a
--- /dev/null
+++ b/lib/gitlab/github_import/importer/milestones_importer.rb
@@ -0,0 +1,58 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class MilestonesImporter
+        include BulkImporting
+
+        attr_reader :project, :client, :existing_milestones
+
+        # project - An instance of `Project`
+        # client - An instance of `Gitlab::GithubImport::Client`
+        def initialize(project, client)
+          @project = project
+          @client = client
+          @existing_milestones = project.milestones.pluck(:iid).to_set
+        end
+
+        def execute
+          bulk_insert(Milestone, build_milestones)
+          build_milestones_cache
+        end
+
+        def build_milestones
+          build_database_rows(each_milestone)
+        end
+
+        def already_imported?(milestone)
+          existing_milestones.include?(milestone.number)
+        end
+
+        def build_milestones_cache
+          MilestoneFinder.new(project).build_cache
+        end
+
+        def build(milestone)
+          {
+            iid: milestone.number,
+            title: milestone.title,
+            description: milestone.description,
+            project_id: project.id,
+            state: state_for(milestone),
+            created_at: milestone.created_at,
+            updated_at: milestone.updated_at
+          }
+        end
+
+        def state_for(milestone)
+          milestone.state == 'open' ? :active : :closed
+        end
+
+        def each_milestone
+          client.milestones(project.import_source, state: 'all')
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/note_importer.rb b/lib/gitlab/github_import/importer/note_importer.rb
new file mode 100644
index 00000000000..c890f2df360
--- /dev/null
+++ b/lib/gitlab/github_import/importer/note_importer.rb
@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class NoteImporter
+        attr_reader :note, :project, :client, :user_finder
+
+        # note - An instance of `Gitlab::GithubImport::Representation::Note`.
+        # project - An instance of `Project`.
+        # client - An instance of `Gitlab::GithubImport::Client`.
+        def initialize(note, project, client)
+          @note = note
+          @project = project
+          @client = client
+          @user_finder = UserFinder.new(project, client)
+        end
+
+        def execute
+          return unless (noteable_id = find_noteable_id)
+
+          author_id, author_found = user_finder.author_id_for(note)
+
+          note_body =
+            MarkdownText.format(note.note, note.author, author_found)
+
+          attributes = {
+            noteable_type: note.noteable_type,
+            noteable_id: noteable_id,
+            project_id: project.id,
+            author_id: author_id,
+            note: note_body,
+            system: false,
+            created_at: note.created_at,
+            updated_at: note.updated_at
+          }
+
+          # We're using bulk_insert here so we can bypass any validations and
+          # callbacks. Running these would result in a lot of unnecessary SQL
+          # queries being executed when importing large projects.
+          Gitlab::Database.bulk_insert(Note.table_name, [attributes])
+        rescue ActiveRecord::InvalidForeignKey
+          # It's possible the project and the issue have been deleted since
+          # scheduling this job. In this case we'll just skip creating the note.
+        end
+
+        # Returns the ID of the issue or merge request to create the note for.
+        def find_noteable_id
+          GithubImport::IssuableFinder.new(project, note).database_id
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/notes_importer.rb b/lib/gitlab/github_import/importer/notes_importer.rb
new file mode 100644
index 00000000000..5aec760ea5f
--- /dev/null
+++ b/lib/gitlab/github_import/importer/notes_importer.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class NotesImporter
+        include ParallelScheduling
+
+        def importer_class
+          NoteImporter
+        end
+
+        def representation_class
+          Representation::Note
+        end
+
+        def sidekiq_worker_class
+          ImportNoteWorker
+        end
+
+        def collection_method
+          :issues_comments
+        end
+
+        def id_for_already_imported_cache(note)
+          note.id
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/pull_request_importer.rb b/lib/gitlab/github_import/importer/pull_request_importer.rb
new file mode 100644
index 00000000000..49d859f9624
--- /dev/null
+++ b/lib/gitlab/github_import/importer/pull_request_importer.rb
@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class PullRequestImporter
+        attr_reader :pull_request, :project, :client, :user_finder,
+                    :milestone_finder, :issuable_finder
+
+        # pull_request - An instance of
+        #                `Gitlab::GithubImport::Representation::PullRequest`.
+        # project - An instance of `Project`
+        # client - An instance of `Gitlab::GithubImport::Client`
+        def initialize(pull_request, project, client)
+          @pull_request = pull_request
+          @project = project
+          @client = client
+          @user_finder = UserFinder.new(project, client)
+          @milestone_finder = MilestoneFinder.new(project)
+          @issuable_finder =
+            GithubImport::IssuableFinder.new(project, pull_request)
+        end
+
+        def execute
+          if (mr_id = create_merge_request)
+            issuable_finder.cache_database_id(mr_id)
+          end
+        end
+
+        # Creates the merge request and returns its ID.
+        #
+        # This method will return `nil` if the merge request could not be
+        # created.
+        def create_merge_request
+          author_id, author_found = user_finder.author_id_for(pull_request)
+
+          description = MarkdownText
+            .format(pull_request.description, pull_request.author, author_found)
+
+          # This work must be wrapped in a transaction as otherwise we can leave
+          # behind incomplete data in the event of an error. This can then lead
+          # to duplicate key errors when jobs are retried.
+          MergeRequest.transaction do
+            attributes = {
+              iid: pull_request.iid,
+              title: pull_request.truncated_title,
+              description: description,
+              source_project_id: project.id,
+              target_project_id: project.id,
+              source_branch: pull_request.formatted_source_branch,
+              target_branch: pull_request.target_branch,
+              state: pull_request.state,
+              milestone_id: milestone_finder.id_for(pull_request),
+              author_id: author_id,
+              assignee_id: user_finder.assignee_id_for(pull_request),
+              created_at: pull_request.created_at,
+              updated_at: pull_request.updated_at
+            }
+
+            # When creating merge requests there are a lot of hooks that may
+            # run, for many different reasons. Many of these hooks (e.g. the
+            # ones used for rendering Markdown) are completely unnecessary and
+            # may even lead to transaction timeouts.
+            #
+            # To ensure importing pull requests has a minimal impact and can
+            # complete in a reasonable time we bypass all the hooks by inserting
+            # the row and then retrieving it. We then only perform the
+            # additional work that is strictly necessary.
+            merge_request_id = GithubImport
+              .insert_and_return_id(attributes, project.merge_requests)
+
+            merge_request = project.merge_requests.find(merge_request_id)
+
+            # These fields are set so we can create the correct merge request
+            # diffs.
+            merge_request.source_branch_sha = pull_request.source_branch_sha
+            merge_request.target_branch_sha = pull_request.target_branch_sha
+
+            merge_request.keep_around_commit
+            merge_request.merge_request_diffs.create
+
+            merge_request.id
+          end
+        rescue ActiveRecord::InvalidForeignKey
+          # It's possible the project has been deleted since scheduling this
+          # job. In this case we'll just skip creating the merge request.
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/pull_requests_importer.rb b/lib/gitlab/github_import/importer/pull_requests_importer.rb
new file mode 100644
index 00000000000..5437e32e9f1
--- /dev/null
+++ b/lib/gitlab/github_import/importer/pull_requests_importer.rb
@@ -0,0 +1,83 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class PullRequestsImporter
+        include ParallelScheduling
+
+        def importer_class
+          PullRequestImporter
+        end
+
+        def representation_class
+          Representation::PullRequest
+        end
+
+        def sidekiq_worker_class
+          ImportPullRequestWorker
+        end
+
+        def id_for_already_imported_cache(pr)
+          pr.number
+        end
+
+        def each_object_to_import
+          super do |pr|
+            update_repository if update_repository?(pr)
+            yield pr
+          end
+        end
+
+        def update_repository
+          # We set this column _before_ fetching the repository, and this is
+          # deliberate. If we were to update this column after the fetch we may
+          # miss out on changes pushed during the fetch or between the fetch and
+          # updating the timestamp.
+          project.update_column(:last_repository_updated_at, Time.zone.now)
+
+          project.repository.fetch_remote('github', forced: false)
+
+          pname = project.path_with_namespace
+
+          Rails.logger
+            .info("GitHub importer finished updating repository for #{pname}")
+
+          repository_updates_counter.increment(project: pname)
+        end
+
+        def update_repository?(pr)
+          last_update = project.last_repository_updated_at || project.created_at
+
+          return false if pr.updated_at < last_update
+
+          # PRs may be updated without there actually being new commits, thus we
+          # check to make sure we only re-fetch if truly necessary.
+          !(commit_exists?(pr.head.sha) && commit_exists?(pr.base.sha))
+        end
+
+        def commit_exists?(sha)
+          project.repository.lookup(sha)
+          true
+        rescue Rugged::Error
+          false
+        end
+
+        def collection_method
+          :pull_requests
+        end
+
+        def collection_options
+          { state: 'all', sort: 'created', direction: 'asc' }
+        end
+
+        def repository_updates_counter
+          @repository_updates_counter ||= Gitlab::Metrics.counter(
+            :github_importer_repository_updates,
+            'The number of times repositories have to be updated again'
+          )
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/releases_importer.rb b/lib/gitlab/github_import/importer/releases_importer.rb
new file mode 100644
index 00000000000..100f459fdcc
--- /dev/null
+++ b/lib/gitlab/github_import/importer/releases_importer.rb
@@ -0,0 +1,55 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class ReleasesImporter
+        include BulkImporting
+
+        attr_reader :project, :client, :existing_tags
+
+        # project - An instance of `Project`
+        # client - An instance of `Gitlab::GithubImport::Client`
+        def initialize(project, client)
+          @project = project
+          @client = client
+          @existing_tags = project.releases.pluck(:tag).to_set
+        end
+
+        def execute
+          bulk_insert(Release, build_releases)
+        end
+
+        def build_releases
+          build_database_rows(each_release)
+        end
+
+        def already_imported?(release)
+          existing_tags.include?(release.tag_name)
+        end
+
+        def build(release)
+          {
+            tag: release.tag_name,
+            description: description_for(release),
+            created_at: release.created_at,
+            updated_at: release.updated_at,
+            project_id: project.id
+          }
+        end
+
+        def each_release
+          client.releases(project.import_source)
+        end
+
+        def description_for(release)
+          if release.body.present?
+            release.body
+          else
+            "Release for tag #{release.tag_name}"
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/importer/repository_importer.rb b/lib/gitlab/github_import/importer/repository_importer.rb
new file mode 100644
index 00000000000..0b67fc8db73
--- /dev/null
+++ b/lib/gitlab/github_import/importer/repository_importer.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Importer
+      class RepositoryImporter
+        include Gitlab::ShellAdapter
+
+        attr_reader :project, :client
+
+        def initialize(project, client)
+          @project = project
+          @client = client
+        end
+
+        # Returns true if we should import the wiki for the project.
+        def import_wiki?
+          client.repository(project.import_source)&.has_wiki &&
+            !project.wiki_repository_exists?
+        end
+
+        # Imports the repository data.
+        #
+        # This method will return true if the data was imported successfully or
+        # the repository had already been imported before.
+        def execute
+          imported =
+            # It's possible a repository has already been imported when running
+            # this code, e.g. because we had to retry this job after
+            # `import_wiki?` raised a rate limit error. In this case we'll skip
+            # re-importing the main repository.
+            if project.repository.empty_repo?
+              import_repository
+            else
+              true
+            end
+
+          update_clone_time if imported
+
+          imported = import_wiki_repository if import_wiki? && imported
+
+          imported
+        end
+
+        def import_repository
+          project.ensure_repository
+
+          configure_repository_remote
+
+          project.repository.fetch_remote('github', forced: true)
+
+          true
+        rescue Gitlab::Git::Repository::NoRepository, Gitlab::Shell::Error => e
+          fail_import("Failed to import the repository: #{e.message}")
+        end
+
+        def configure_repository_remote
+          return if project.repository.remote_exists?('github')
+
+          project.repository.add_remote('github', project.import_url)
+          project.repository.set_import_remote_as_mirror('github')
+
+          project.repository.add_remote_fetch_config(
+            'github',
+            '+refs/pull/*/head:refs/merge-requests/*/head'
+          )
+        end
+
+        def import_wiki_repository
+          wiki_path = "#{project.disk_path}.wiki"
+          wiki_url = project.import_url.sub(/\.git\z/, '.wiki.git')
+          storage_path = project.repository_storage_path
+
+          gitlab_shell.import_repository(storage_path, wiki_path, wiki_url)
+
+          true
+        rescue Gitlab::Shell::Error => e
+          if e.message !~ /repository not exported/
+            fail_import("Failed to import the wiki: #{e.message}")
+          else
+            true
+          end
+        end
+
+        def update_clone_time
+          project.update_column(:last_repository_updated_at, Time.zone.now)
+        end
+
+        def fail_import(message)
+          project.mark_import_as_failed(message)
+          false
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/issuable_finder.rb b/lib/gitlab/github_import/issuable_finder.rb
new file mode 100644
index 00000000000..211915f1d87
--- /dev/null
+++ b/lib/gitlab/github_import/issuable_finder.rb
@@ -0,0 +1,81 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    # IssuableFinder can be used for caching and retrieving database IDs for
+    # issuable objects such as issues and pull requests. By caching these IDs we
+    # remove the need for running a lot of database queries when importing
+    # GitHub projects.
+    class IssuableFinder
+      attr_reader :project, :object
+
+      # The base cache key to use for storing/retrieving issuable IDs.
+      CACHE_KEY = 'github-import/issuable-finder/%{project}/%{type}/%{iid}'.freeze
+
+      # project - An instance of `Project`.
+      # object - The object to look up or set a database ID for.
+      def initialize(project, object)
+        @project = project
+        @object = object
+      end
+
+      # Returns the database ID for the object.
+      #
+      # This method will return `nil` if no ID could be found.
+      def database_id
+        val = Caching.read(cache_key)
+
+        val.to_i if val.present?
+      end
+
+      # Associates the given database ID with the current object.
+      #
+      # database_id - The ID of the corresponding database row.
+      def cache_database_id(database_id)
+        Caching.write(cache_key, database_id)
+      end
+
+      private
+
+      def cache_key
+        CACHE_KEY % {
+          project: project.id,
+          type: cache_key_type,
+          iid: cache_key_iid
+        }
+      end
+
+      # Returns the identifier to use for cache keys.
+      #
+      # For issues and pull requests this will be "Issue" or "MergeRequest"
+      # respectively. For diff notes this will return "MergeRequest", for
+      # regular notes it will either return "Issue" or "MergeRequest" depending
+      # on what type of object the note belongs to.
+      def cache_key_type
+        if object.respond_to?(:issuable_type)
+          object.issuable_type
+        elsif object.respond_to?(:noteable_type)
+          object.noteable_type
+        else
+          raise(
+            TypeError,
+            "Instances of #{object.class} are not supported"
+          )
+        end
+      end
+
+      def cache_key_iid
+        if object.respond_to?(:noteable_id)
+          object.noteable_id
+        elsif object.respond_to?(:iid)
+          object.iid
+        else
+          raise(
+            TypeError,
+            "Instances of #{object.class} are not supported"
+          )
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/issuable_formatter.rb b/lib/gitlab/github_import/issuable_formatter.rb
deleted file mode 100644
index 27b171d6ddb..00000000000
--- a/lib/gitlab/github_import/issuable_formatter.rb
+++ /dev/null
@@ -1,66 +0,0 @@
-module Gitlab
-  module GithubImport
-    class IssuableFormatter < BaseFormatter
-      attr_writer :assignee_id, :author_id
-
-      def project_association
-        raise NotImplementedError
-      end
-
-      delegate :number, to: :raw_data
-
-      def find_condition
-        { iid: number }
-      end
-
-      private
-
-      def state
-        raw_data.state == 'closed' ? 'closed' : 'opened'
-      end
-
-      def assigned?
-        raw_data.assignee.present?
-      end
-
-      def author
-        @author ||= UserFormatter.new(client, raw_data.user)
-      end
-
-      def author_id
-        @author_id ||= author.gitlab_id || project.creator_id
-      end
-
-      def assignee
-        if assigned?
-          @assignee ||= UserFormatter.new(client, raw_data.assignee)
-        end
-      end
-
-      def assignee_id
-        return @assignee_id if defined?(@assignee_id)
-
-        @assignee_id = assignee.try(:gitlab_id)
-      end
-
-      def body
-        raw_data.body || ""
-      end
-
-      def description
-        if author.gitlab_id
-          body
-        else
-          formatter.author_line(author.login) + body
-        end
-      end
-
-      def milestone
-        if raw_data.milestone.present?
-          milestone = MilestoneFormatter.new(project, raw_data.milestone)
-          project.milestones.find_by(milestone.find_condition)
-        end
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/issue_formatter.rb b/lib/gitlab/github_import/issue_formatter.rb
deleted file mode 100644
index 977cd0423ba..00000000000
--- a/lib/gitlab/github_import/issue_formatter.rb
+++ /dev/null
@@ -1,32 +0,0 @@
-module Gitlab
-  module GithubImport
-    class IssueFormatter < IssuableFormatter
-      def attributes
-        {
-          iid: number,
-          project: project,
-          milestone: milestone,
-          title: raw_data.title,
-          description: description,
-          state: state,
-          author_id: author_id,
-          assignee_ids: Array(assignee_id),
-          created_at: raw_data.created_at,
-          updated_at: raw_data.updated_at
-        }
-      end
-
-      def has_comments?
-        raw_data.comments > 0
-      end
-
-      def project_association
-        :issues
-      end
-
-      def pull_request?
-        raw_data.pull_request.present?
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/label_finder.rb b/lib/gitlab/github_import/label_finder.rb
new file mode 100644
index 00000000000..9be071141db
--- /dev/null
+++ b/lib/gitlab/github_import/label_finder.rb
@@ -0,0 +1,37 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    class LabelFinder
+      attr_reader :project
+
+      # The base cache key to use for storing/retrieving label IDs.
+      CACHE_KEY = 'github-import/label-finder/%{project}/%{name}'.freeze
+
+      # project - An instance of `Project`.
+      def initialize(project)
+        @project = project
+      end
+
+      # Returns the label ID for the given name.
+      def id_for(name)
+        Caching.read_integer(cache_key_for(name))
+      end
+
+      def build_cache
+        mapping = @project
+          .labels
+          .pluck(:id, :name)
+          .each_with_object({}) do |(id, name), hash|
+            hash[cache_key_for(name)] = id
+          end
+
+        Caching.write_multiple(mapping)
+      end
+
+      def cache_key_for(name)
+        CACHE_KEY % { project: project.id, name: name }
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/label_formatter.rb b/lib/gitlab/github_import/label_formatter.rb
deleted file mode 100644
index 211ccdc51bb..00000000000
--- a/lib/gitlab/github_import/label_formatter.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-module Gitlab
-  module GithubImport
-    class LabelFormatter < BaseFormatter
-      def attributes
-        {
-          project: project,
-          title: title,
-          color: color
-        }
-      end
-
-      def project_association
-        :labels
-      end
-
-      def create!
-        params  = attributes.except(:project)
-        service = ::Labels::FindOrCreateService.new(nil, project, params)
-        label   = service.execute(skip_authorization: true)
-
-        raise ActiveRecord::RecordInvalid.new(label) unless label.persisted?
-
-        label
-      end
-
-      private
-
-      def color
-        "##{raw_data.color}"
-      end
-
-      def title
-        raw_data.name
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/markdown_text.rb b/lib/gitlab/github_import/markdown_text.rb
new file mode 100644
index 00000000000..b25c4f7becf
--- /dev/null
+++ b/lib/gitlab/github_import/markdown_text.rb
@@ -0,0 +1,30 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    class MarkdownText
+      attr_reader :text, :author, :exists
+
+      def self.format(*args)
+        new(*args).to_s
+      end
+
+      # text - The Markdown text as a String.
+      # author - An instance of `Gitlab::GithubImport::Representation::User`
+      # exists - Boolean that indicates the user exists in the GitLab database.
+      def initialize(text, author, exists = false)
+        @text = text
+        @author = author
+        @exists = exists
+      end
+
+      def to_s
+        if exists
+          text
+        else
+          "*Created by: #{author.login}*\n\n#{text}"
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/milestone_finder.rb b/lib/gitlab/github_import/milestone_finder.rb
new file mode 100644
index 00000000000..208d15dc144
--- /dev/null
+++ b/lib/gitlab/github_import/milestone_finder.rb
@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    class MilestoneFinder
+      attr_reader :project
+
+      # The base cache key to use for storing/retrieving milestone IDs.
+      CACHE_KEY = 'github-import/milestone-finder/%{project}/%{iid}'.freeze
+
+      # project - An instance of `Project`
+      def initialize(project)
+        @project = project
+      end
+
+      # issuable - An instance of `Gitlab::GithubImport::Representation::Issue`
+      #            or `Gitlab::GithubImport::Representation::PullRequest`.
+      def id_for(issuable)
+        return unless issuable.milestone_number
+
+        Caching.read_integer(cache_key_for(issuable.milestone_number))
+      end
+
+      def build_cache
+        mapping = @project
+          .milestones
+          .pluck(:id, :iid)
+          .each_with_object({}) do |(id, iid), hash|
+            hash[cache_key_for(iid)] = id
+          end
+
+        Caching.write_multiple(mapping)
+      end
+
+      def cache_key_for(iid)
+        CACHE_KEY % { project: project.id, iid: iid }
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/milestone_formatter.rb b/lib/gitlab/github_import/milestone_formatter.rb
deleted file mode 100644
index dd782eff059..00000000000
--- a/lib/gitlab/github_import/milestone_formatter.rb
+++ /dev/null
@@ -1,40 +0,0 @@
-module Gitlab
-  module GithubImport
-    class MilestoneFormatter < BaseFormatter
-      def attributes
-        {
-          iid: number,
-          project: project,
-          title: raw_data.title,
-          description: raw_data.description,
-          due_date: raw_data.due_on,
-          state: state,
-          created_at: raw_data.created_at,
-          updated_at: raw_data.updated_at
-        }
-      end
-
-      def project_association
-        :milestones
-      end
-
-      def find_condition
-        { iid: number }
-      end
-
-      def number
-        if project.gitea_import?
-          raw_data.id
-        else
-          raw_data.number
-        end
-      end
-
-      private
-
-      def state
-        raw_data.state == 'closed' ? 'closed' : 'active'
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/page_counter.rb b/lib/gitlab/github_import/page_counter.rb
new file mode 100644
index 00000000000..c3db2d0b469
--- /dev/null
+++ b/lib/gitlab/github_import/page_counter.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    # PageCounter can be used to keep track of the last imported page of a
+    # collection, allowing workers to resume where they left off in the event of
+    # an error.
+    class PageCounter
+      attr_reader :cache_key
+
+      # The base cache key to use for storing the last page number.
+      CACHE_KEY = 'github-importer/page-counter/%{project}/%{collection}'.freeze
+
+      def initialize(project, collection)
+        @cache_key = CACHE_KEY % { project: project.id, collection: collection }
+      end
+
+      # Sets the page number to the given value.
+      #
+      # Returns true if the page number was overwritten, false otherwise.
+      def set(page)
+        Caching.write_if_greater(cache_key, page)
+      end
+
+      # Returns the current value from the cache.
+      def current
+        Caching.read_integer(cache_key) || 1
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/parallel_importer.rb b/lib/gitlab/github_import/parallel_importer.rb
new file mode 100644
index 00000000000..81739834b41
--- /dev/null
+++ b/lib/gitlab/github_import/parallel_importer.rb
@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    # The ParallelImporter schedules the importing of a GitHub project using
+    # Sidekiq.
+    class ParallelImporter
+      attr_reader :project
+
+      def self.async?
+        true
+      end
+
+      def initialize(project)
+        @project = project
+      end
+
+      def execute
+        jid = generate_jid
+
+        # The original import JID is the JID of the RepositoryImportWorker job,
+        # which will be removed once that job completes. Reusing that JID could
+        # result in StuckImportJobsWorker marking the job as stuck before we get
+        # to running Stage::ImportRepositoryWorker.
+        #
+        # We work around this by setting the JID to a custom generated one, then
+        # refreshing it in the various stages whenever necessary.
+        Gitlab::SidekiqStatus
+          .set(jid, StuckImportJobsWorker::IMPORT_JOBS_EXPIRATION)
+
+        project.update_column(:import_jid, jid)
+
+        Stage::ImportRepositoryWorker
+          .perform_async(project.id)
+
+        true
+      end
+
+      def generate_jid
+        "github-importer/#{project.id}"
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/parallel_scheduling.rb b/lib/gitlab/github_import/parallel_scheduling.rb
new file mode 100644
index 00000000000..d4d1357f5a3
--- /dev/null
+++ b/lib/gitlab/github_import/parallel_scheduling.rb
@@ -0,0 +1,162 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module ParallelScheduling
+      attr_reader :project, :client, :page_counter, :already_imported_cache_key
+
+      # The base cache key to use for tracking already imported objects.
+      ALREADY_IMPORTED_CACHE_KEY =
+        'github-importer/already-imported/%{project}/%{collection}'.freeze
+
+      # project - An instance of `Project`.
+      # client - An instance of `Gitlab::GithubImport::Client`.
+      # parallel - When set to true the objects will be imported in parallel.
+      def initialize(project, client, parallel: true)
+        @project = project
+        @client = client
+        @parallel = parallel
+        @page_counter = PageCounter.new(project, collection_method)
+        @already_imported_cache_key = ALREADY_IMPORTED_CACHE_KEY %
+          { project: project.id, collection: collection_method }
+      end
+
+      def parallel?
+        @parallel
+      end
+
+      def execute
+        retval =
+          if parallel?
+            parallel_import
+          else
+            sequential_import
+          end
+
+        # Once we have completed all work we can remove our "already exists"
+        # cache so we don't put too much pressure on Redis.
+        #
+        # We don't immediately remove it since it's technically possible for
+        # other instances of this job to still run, instead we set the
+        # expiration time to a lower value. This prevents the other jobs from
+        # still scheduling duplicates while. Since all work has already been
+        # completed those jobs will just cycle through any remaining pages while
+        # not scheduling anything.
+        Caching.expire(already_imported_cache_key, 15.minutes.to_i)
+
+        retval
+      end
+
+      # Imports all the objects in sequence in the current thread.
+      def sequential_import
+        each_object_to_import do |object|
+          repr = representation_class.from_api_response(object)
+
+          importer_class.new(repr, project, client).execute
+        end
+      end
+
+      # Imports all objects in parallel by scheduling a Sidekiq job for every
+      # individual object.
+      def parallel_import
+        waiter = JobWaiter.new
+
+        each_object_to_import do |object|
+          repr = representation_class.from_api_response(object)
+
+          sidekiq_worker_class
+            .perform_async(project.id, repr.to_hash, waiter.key)
+
+          waiter.jobs_remaining += 1
+        end
+
+        waiter
+      end
+
+      # The method that will be called for traversing through all the objects to
+      # import, yielding them to the supplied block.
+      def each_object_to_import
+        repo = project.import_source
+
+        # We inject the page number here to make sure that all importers always
+        # start where they left off. Simply starting over wouldn't work for
+        # repositories with a lot of data (e.g. tens of thousands of comments).
+        options = collection_options.merge(page: page_counter.current)
+
+        client.each_page(collection_method, repo, options) do |page|
+          # Technically it's possible that the same work is performed multiple
+          # times, as Sidekiq doesn't guarantee there will ever only be one
+          # instance of a job. In such a scenario it's possible for one job to
+          # have a lower page number (e.g. 5) compared to another (e.g. 10). In
+          # this case we skip over all the objects until we have caught up,
+          # reducing the number of duplicate jobs scheduled by the provided
+          # block.
+          next unless page_counter.set(page.number)
+
+          page.objects.each do |object|
+            next if already_imported?(object)
+
+            yield object
+
+            # We mark the object as imported immediately so we don't end up
+            # scheduling it multiple times.
+            mark_as_imported(object)
+          end
+        end
+      end
+
+      # Returns true if the given object has already been imported, false
+      # otherwise.
+      #
+      # object - The object to check.
+      def already_imported?(object)
+        id = id_for_already_imported_cache(object)
+
+        Caching.set_includes?(already_imported_cache_key, id)
+      end
+
+      # Marks the given object as "already imported".
+      def mark_as_imported(object)
+        id = id_for_already_imported_cache(object)
+
+        Caching.set_add(already_imported_cache_key, id)
+      end
+
+      # Returns the ID to use for the cache used for checking if an object has
+      # already been imported or not.
+      #
+      # object - The object we may want to import.
+      def id_for_already_imported_cache(object)
+        raise NotImplementedError
+      end
+
+      # The class used for converting API responses to Hashes when performing
+      # the import.
+      def representation_class
+        raise NotImplementedError
+      end
+
+      # The class to use for importing objects when importing them sequentially.
+      def importer_class
+        raise NotImplementedError
+      end
+
+      # The Sidekiq worker class used for scheduling the importing of objects in
+      # parallel.
+      def sidekiq_worker_class
+        raise NotImplementedError
+      end
+
+      # The name of the method to call to retrieve the data to import.
+      def collection_method
+        raise NotImplementedError
+      end
+
+      # Any options to be passed to the method used for retrieving the data to
+      # import.
+      def collection_options
+        {}
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/project_creator.rb b/lib/gitlab/github_import/project_creator.rb
deleted file mode 100644
index a55adc9b1c8..00000000000
--- a/lib/gitlab/github_import/project_creator.rb
+++ /dev/null
@@ -1,52 +0,0 @@
-module Gitlab
-  module GithubImport
-    class ProjectCreator
-      include Gitlab::CurrentSettings
-
-      attr_reader :repo, :name, :namespace, :current_user, :session_data, :type
-
-      def initialize(repo, name, namespace, current_user, session_data, type: 'github')
-        @repo = repo
-        @name = name
-        @namespace = namespace
-        @current_user = current_user
-        @session_data = session_data
-        @type = type
-      end
-
-      def execute
-        ::Projects::CreateService.new(
-          current_user,
-          name: name,
-          path: name,
-          description: repo.description,
-          namespace_id: namespace.id,
-          visibility_level: visibility_level,
-          import_type: type,
-          import_source: repo.full_name,
-          import_url: import_url,
-          skip_wiki: skip_wiki
-        ).execute
-      end
-
-      private
-
-      def import_url
-        repo.clone_url.sub('://', "://#{session_data[:github_access_token]}@")
-      end
-
-      def visibility_level
-        repo.private ? Gitlab::VisibilityLevel::PRIVATE : current_application_settings.default_project_visibility
-      end
-
-      #
-      # If the GitHub project repository has wiki, we should not create the
-      # default wiki. Otherwise the GitHub importer will fail because the wiki
-      # repository already exist.
-      #
-      def skip_wiki
-        repo.has_wiki?
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/pull_request_formatter.rb b/lib/gitlab/github_import/pull_request_formatter.rb
deleted file mode 100644
index 150afa31432..00000000000
--- a/lib/gitlab/github_import/pull_request_formatter.rb
+++ /dev/null
@@ -1,90 +0,0 @@
-module Gitlab
-  module GithubImport
-    class PullRequestFormatter < IssuableFormatter
-      delegate :user, :project, :ref, :repo, :sha, to: :source_branch, prefix: true
-      delegate :user, :exists?, :project, :ref, :repo, :sha, :short_sha, to: :target_branch, prefix: true
-
-      def attributes
-        {
-          iid: number,
-          title: raw_data.title,
-          description: description,
-          source_project: source_branch_project,
-          source_branch: source_branch_name,
-          source_branch_sha: source_branch_sha,
-          target_project: target_branch_project,
-          target_branch: target_branch_name,
-          target_branch_sha: target_branch_sha,
-          state: state,
-          milestone: milestone,
-          author_id: author_id,
-          assignee_id: assignee_id,
-          created_at: raw_data.created_at,
-          updated_at: raw_data.updated_at,
-          imported: true
-        }
-      end
-
-      def project_association
-        :merge_requests
-      end
-
-      def valid?
-        source_branch.valid? && target_branch.valid?
-      end
-
-      def source_branch
-        @source_branch ||= BranchFormatter.new(project, raw_data.head)
-      end
-
-      def source_branch_name
-        @source_branch_name ||=
-          if cross_project? || !source_branch_exists?
-            source_branch_name_prefixed
-          else
-            source_branch_ref
-          end
-      end
-
-      def source_branch_name_prefixed
-        "gh-#{target_branch_short_sha}/#{number}/#{source_branch_user}/#{source_branch_ref}"
-      end
-
-      def source_branch_exists?
-        !cross_project? && source_branch.exists?
-      end
-
-      def target_branch
-        @target_branch ||= BranchFormatter.new(project, raw_data.base)
-      end
-
-      def target_branch_name
-        @target_branch_name ||= target_branch_exists? ? target_branch_ref : target_branch_name_prefixed
-      end
-
-      def target_branch_name_prefixed
-        "gl-#{target_branch_short_sha}/#{number}/#{target_branch_user}/#{target_branch_ref}"
-      end
-
-      def cross_project?
-        return true if source_branch_repo.nil?
-
-        source_branch_repo.id != target_branch_repo.id
-      end
-
-      def opened?
-        state == 'opened'
-      end
-
-      private
-
-      def state
-        if raw_data.state == 'closed' && raw_data.merged_at.present?
-          'merged'
-        else
-          super
-        end
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/rate_limit_error.rb b/lib/gitlab/github_import/rate_limit_error.rb
new file mode 100644
index 00000000000..cc2de909c29
--- /dev/null
+++ b/lib/gitlab/github_import/rate_limit_error.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    # Error that will be raised when we're about to reach (or have reached) the
+    # GitHub API's rate limit.
+    RateLimitError = Class.new(StandardError)
+  end
+end
diff --git a/lib/gitlab/github_import/release_formatter.rb b/lib/gitlab/github_import/release_formatter.rb
deleted file mode 100644
index 1ad702a6058..00000000000
--- a/lib/gitlab/github_import/release_formatter.rb
+++ /dev/null
@@ -1,27 +0,0 @@
-module Gitlab
-  module GithubImport
-    class ReleaseFormatter < BaseFormatter
-      def attributes
-        {
-          project: project,
-          tag: raw_data.tag_name,
-          description: raw_data.body,
-          created_at: raw_data.created_at,
-          updated_at: raw_data.created_at
-        }
-      end
-
-      def project_association
-        :releases
-      end
-
-      def find_condition
-        { tag: raw_data.tag_name }
-      end
-
-      def valid?
-        !raw_data.draft
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/representation.rb b/lib/gitlab/github_import/representation.rb
new file mode 100644
index 00000000000..639477ef2a2
--- /dev/null
+++ b/lib/gitlab/github_import/representation.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Representation
+      TIMESTAMP_KEYS = %i[created_at updated_at merged_at].freeze
+
+      # Converts a Hash with String based keys to one that can be used by the
+      # various Representation classes.
+      #
+      # Example:
+      #
+      #     Representation.symbolize_hash('number' => 10) # => { number: 10 }
+      def self.symbolize_hash(raw_hash = nil)
+        hash = raw_hash.deep_symbolize_keys
+
+        TIMESTAMP_KEYS.each do |key|
+          hash[key] = Time.parse(hash[key]) if hash[key].is_a?(String)
+        end
+
+        hash
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/representation/diff_note.rb b/lib/gitlab/github_import/representation/diff_note.rb
new file mode 100644
index 00000000000..bb7439a0641
--- /dev/null
+++ b/lib/gitlab/github_import/representation/diff_note.rb
@@ -0,0 +1,87 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Representation
+      class DiffNote
+        include ToHash
+        include ExposeAttribute
+
+        attr_reader :attributes
+
+        expose_attribute :noteable_type, :noteable_id, :commit_id, :file_path,
+                         :diff_hunk, :author, :note, :created_at, :updated_at,
+                         :github_id
+
+        NOTEABLE_ID_REGEX = /\/pull\/(?<iid>\d+)/i
+
+        # Builds a diff note from a GitHub API response.
+        #
+        # note - An instance of `Sawyer::Resource` containing the note details.
+        def self.from_api_response(note)
+          matches = note.html_url.match(NOTEABLE_ID_REGEX)
+
+          unless matches
+            raise(
+              ArgumentError,
+              "The note URL #{note.html_url.inspect} is not supported"
+            )
+          end
+
+          user = Representation::User.from_api_response(note.user) if note.user
+          hash = {
+            noteable_type: 'MergeRequest',
+            noteable_id: matches[:iid].to_i,
+            file_path: note.path,
+            commit_id: note.commit_id,
+            diff_hunk: note.diff_hunk,
+            author: user,
+            note: note.body,
+            created_at: note.created_at,
+            updated_at: note.updated_at,
+            github_id: note.id
+          }
+
+          new(hash)
+        end
+
+        # Builds a new note using a Hash that was built from a JSON payload.
+        def self.from_json_hash(raw_hash)
+          hash = Representation.symbolize_hash(raw_hash)
+          hash[:author] &&= Representation::User.from_json_hash(hash[:author])
+
+          new(hash)
+        end
+
+        # attributes - A Hash containing the raw note details. The keys of this
+        #              Hash must be Symbols.
+        def initialize(attributes)
+          @attributes = attributes
+        end
+
+        def line_code
+          diff_line = Gitlab::Diff::Parser.new.parse(diff_hunk.lines).to_a.last
+
+          Gitlab::Git
+            .diff_line_code(file_path, diff_line.new_pos, diff_line.old_pos)
+        end
+
+        # Returns a Hash that can be used to populate `notes.st_diff`, removing
+        # the need for requesting Git data for every diff note.
+        def diff_hash
+          {
+            diff: diff_hunk,
+            new_path: file_path,
+            old_path: file_path,
+
+            # These fields are not displayed for LegacyDiffNote notes, so it
+            # doesn't really matter what we set them to.
+            a_mode: '100644',
+            b_mode: '100644',
+            new_file: false
+          }
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/representation/expose_attribute.rb b/lib/gitlab/github_import/representation/expose_attribute.rb
new file mode 100644
index 00000000000..c3405759631
--- /dev/null
+++ b/lib/gitlab/github_import/representation/expose_attribute.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Representation
+      module ExposeAttribute
+        extend ActiveSupport::Concern
+
+        module ClassMethods
+          # Defines getter methods for the given attribute names.
+          #
+          # Example:
+          #
+          #     expose_attribute :iid, :title
+          def expose_attribute(*names)
+            names.each do |name|
+              name = name.to_sym
+
+              define_method(name) { attributes[name] }
+            end
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/representation/issue.rb b/lib/gitlab/github_import/representation/issue.rb
new file mode 100644
index 00000000000..f3071b3e2b3
--- /dev/null
+++ b/lib/gitlab/github_import/representation/issue.rb
@@ -0,0 +1,80 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Representation
+      class Issue
+        include ToHash
+        include ExposeAttribute
+
+        attr_reader :attributes
+
+        expose_attribute :iid, :title, :description, :milestone_number,
+                         :created_at, :updated_at, :state, :assignees,
+                         :label_names, :author
+
+        # Builds an issue from a GitHub API response.
+        #
+        # issue - An instance of `Sawyer::Resource` containing the issue
+        #         details.
+        def self.from_api_response(issue)
+          user =
+            if issue.user
+              Representation::User.from_api_response(issue.user)
+            end
+
+          hash = {
+            iid: issue.number,
+            title: issue.title,
+            description: issue.body,
+            milestone_number: issue.milestone&.number,
+            state: issue.state == 'open' ? :opened : :closed,
+            assignees: issue.assignees.map do |u|
+              Representation::User.from_api_response(u)
+            end,
+            label_names: issue.labels.map(&:name),
+            author: user,
+            created_at: issue.created_at,
+            updated_at: issue.updated_at,
+            pull_request: issue.pull_request ? true : false
+          }
+
+          new(hash)
+        end
+
+        # Builds a new issue using a Hash that was built from a JSON payload.
+        def self.from_json_hash(raw_hash)
+          hash = Representation.symbolize_hash(raw_hash)
+
+          hash[:state] = hash[:state].to_sym
+          hash[:assignees].map! { |u| Representation::User.from_json_hash(u) }
+          hash[:author] &&= Representation::User.from_json_hash(hash[:author])
+
+          new(hash)
+        end
+
+        # attributes - A hash containing the raw issue details. The keys of this
+        #              Hash (and any nested hashes) must be symbols.
+        def initialize(attributes)
+          @attributes = attributes
+        end
+
+        def truncated_title
+          title.truncate(255)
+        end
+
+        def labels?
+          label_names && label_names.any?
+        end
+
+        def pull_request?
+          attributes[:pull_request]
+        end
+
+        def issuable_type
+          pull_request? ? 'MergeRequest' : 'Issue'
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/representation/note.rb b/lib/gitlab/github_import/representation/note.rb
new file mode 100644
index 00000000000..a68bc4c002f
--- /dev/null
+++ b/lib/gitlab/github_import/representation/note.rb
@@ -0,0 +1,70 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Representation
+      class Note
+        include ToHash
+        include ExposeAttribute
+
+        attr_reader :attributes
+
+        expose_attribute :noteable_id, :noteable_type, :author, :note,
+                         :created_at, :updated_at, :github_id
+
+        NOTEABLE_TYPE_REGEX = /\/(?<type>(pull|issues))\/(?<iid>\d+)/i
+
+        # Builds a note from a GitHub API response.
+        #
+        # note - An instance of `Sawyer::Resource` containing the note details.
+        def self.from_api_response(note)
+          matches = note.html_url.match(NOTEABLE_TYPE_REGEX)
+
+          if !matches || !matches[:type]
+            raise(
+              ArgumentError,
+              "The note URL #{note.html_url.inspect} is not supported"
+            )
+          end
+
+          noteable_type =
+            if matches[:type] == 'pull'
+              'MergeRequest'
+            else
+              'Issue'
+            end
+
+          user = Representation::User.from_api_response(note.user) if note.user
+          hash = {
+            noteable_type: noteable_type,
+            noteable_id: matches[:iid].to_i,
+            author: user,
+            note: note.body,
+            created_at: note.created_at,
+            updated_at: note.updated_at,
+            github_id: note.id
+          }
+
+          new(hash)
+        end
+
+        # Builds a new note using a Hash that was built from a JSON payload.
+        def self.from_json_hash(raw_hash)
+          hash = Representation.symbolize_hash(raw_hash)
+
+          hash[:author] &&= Representation::User.from_json_hash(hash[:author])
+
+          new(hash)
+        end
+
+        # attributes - A Hash containing the raw note details. The keys of this
+        #              Hash must be Symbols.
+        def initialize(attributes)
+          @attributes = attributes
+        end
+
+        alias_method :issuable_type, :noteable_type
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/representation/pull_request.rb b/lib/gitlab/github_import/representation/pull_request.rb
new file mode 100644
index 00000000000..593b491a837
--- /dev/null
+++ b/lib/gitlab/github_import/representation/pull_request.rb
@@ -0,0 +1,114 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Representation
+      class PullRequest
+        include ToHash
+        include ExposeAttribute
+
+        attr_reader :attributes
+
+        expose_attribute :iid, :title, :description, :source_branch,
+                         :source_branch_sha, :target_branch, :target_branch_sha,
+                         :milestone_number, :author, :assignee, :created_at,
+                         :updated_at, :merged_at, :source_repository_id,
+                         :target_repository_id, :source_repository_owner
+
+        # Builds a PR from a GitHub API response.
+        #
+        # issue - An instance of `Sawyer::Resource` containing the PR details.
+        def self.from_api_response(pr)
+          assignee =
+            if pr.assignee
+              Representation::User.from_api_response(pr.assignee)
+            end
+
+          user = Representation::User.from_api_response(pr.user) if pr.user
+          hash = {
+            iid: pr.number,
+            title: pr.title,
+            description: pr.body,
+            source_branch: pr.head.ref,
+            target_branch: pr.base.ref,
+            source_branch_sha: pr.head.sha,
+            target_branch_sha: pr.base.sha,
+            source_repository_id: pr.head&.repo&.id,
+            target_repository_id: pr.base&.repo&.id,
+            source_repository_owner: pr.head&.user&.login,
+            state: pr.state == 'open' ? :opened : :closed,
+            milestone_number: pr.milestone&.number,
+            author: user,
+            assignee: assignee,
+            created_at: pr.created_at,
+            updated_at: pr.updated_at,
+            merged_at: pr.merged_at
+          }
+
+          new(hash)
+        end
+
+        # Builds a new PR using a Hash that was built from a JSON payload.
+        def self.from_json_hash(raw_hash)
+          hash = Representation.symbolize_hash(raw_hash)
+
+          hash[:state] = hash[:state].to_sym
+          hash[:author] &&= Representation::User.from_json_hash(hash[:author])
+
+          # Assignees are optional so we only convert it from a Hash if one was
+          # set.
+          hash[:assignee] &&= Representation::User
+            .from_json_hash(hash[:assignee])
+
+          new(hash)
+        end
+
+        # attributes - A Hash containing the raw PR details. The keys of this
+        #              Hash (and any nested hashes) must be symbols.
+        def initialize(attributes)
+          @attributes = attributes
+        end
+
+        def truncated_title
+          title.truncate(255)
+        end
+
+        # Returns a formatted source branch.
+        #
+        # For cross-project pull requests the branch name will be in the format
+        # `owner-name:branch-name`.
+        def formatted_source_branch
+          if cross_project? && source_repository_owner
+            "#{source_repository_owner}:#{source_branch}"
+          elsif source_branch == target_branch
+            # Sometimes the source and target branch are the same, but GitLab
+            # doesn't support this. This can happen when both the user and
+            # source repository have been deleted, and the PR was submitted from
+            # the fork's master branch.
+            "#{source_branch}-#{iid}"
+          else
+            source_branch
+          end
+        end
+
+        def state
+          if merged_at
+            :merged
+          else
+            attributes[:state]
+          end
+        end
+
+        def cross_project?
+          return true unless source_repository_id
+
+          source_repository_id != target_repository_id
+        end
+
+        def issuable_type
+          'MergeRequest'
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/representation/to_hash.rb b/lib/gitlab/github_import/representation/to_hash.rb
new file mode 100644
index 00000000000..4a0f36ab8f0
--- /dev/null
+++ b/lib/gitlab/github_import/representation/to_hash.rb
@@ -0,0 +1,31 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Representation
+      module ToHash
+        # Converts the current representation to a Hash. The keys of this Hash
+        # will be Symbols.
+        def to_hash
+          hash = {}
+
+          attributes.each do |key, value|
+            hash[key] = convert_value_for_to_hash(value)
+          end
+
+          hash
+        end
+
+        def convert_value_for_to_hash(value)
+          if value.is_a?(Array)
+            value.map { |v| convert_value_for_to_hash(v) }
+          elsif value.respond_to?(:to_hash)
+            value.to_hash
+          else
+            value
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/representation/user.rb b/lib/gitlab/github_import/representation/user.rb
new file mode 100644
index 00000000000..e00dcfca33d
--- /dev/null
+++ b/lib/gitlab/github_import/representation/user.rb
@@ -0,0 +1,34 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    module Representation
+      class User
+        include ToHash
+        include ExposeAttribute
+
+        attr_reader :attributes
+
+        expose_attribute :id, :login
+
+        # Builds a user from a GitHub API response.
+        #
+        # user - An instance of `Sawyer::Resource` containing the user details.
+        def self.from_api_response(user)
+          new(id: user.id, login: user.login)
+        end
+
+        # Builds a user using a Hash that was built from a JSON payload.
+        def self.from_json_hash(raw_hash)
+          new(Representation.symbolize_hash(raw_hash))
+        end
+
+        # attributes - A Hash containing the user details. The keys of this
+        #              Hash (and any nested hashes) must be symbols.
+        def initialize(attributes)
+          @attributes = attributes
+        end
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/sequential_importer.rb b/lib/gitlab/github_import/sequential_importer.rb
new file mode 100644
index 00000000000..4f7324536a0
--- /dev/null
+++ b/lib/gitlab/github_import/sequential_importer.rb
@@ -0,0 +1,50 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    # The SequentialImporter imports a GitHub project in a single thread,
+    # without using Sidekiq. This makes it useful for testing purposes as well
+    # as Rake tasks, but it should be avoided for anything else in favour of the
+    # parallel importer.
+    class SequentialImporter
+      attr_reader :project, :client
+
+      SEQUENTIAL_IMPORTERS = [
+        Importer::LabelsImporter,
+        Importer::MilestonesImporter,
+        Importer::ReleasesImporter
+      ].freeze
+
+      PARALLEL_IMPORTERS = [
+        Importer::PullRequestsImporter,
+        Importer::IssuesImporter,
+        Importer::DiffNotesImporter,
+        Importer::NotesImporter
+      ].freeze
+
+      # project - The project to import the data into.
+      # token - The token to use for the GitHub API.
+      def initialize(project, token: nil)
+        @project = project
+        @client = GithubImport
+          .new_client_for(project, token: token, parallel: false)
+      end
+
+      def execute
+        Importer::RepositoryImporter.new(project, client).execute
+
+        SEQUENTIAL_IMPORTERS.each do |klass|
+          klass.new(project, client).execute
+        end
+
+        PARALLEL_IMPORTERS.each do |klass|
+          klass.new(project, client, parallel: false).execute
+        end
+
+        project.repository.after_import
+
+        true
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/user_finder.rb b/lib/gitlab/github_import/user_finder.rb
new file mode 100644
index 00000000000..be1259662a7
--- /dev/null
+++ b/lib/gitlab/github_import/user_finder.rb
@@ -0,0 +1,164 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module GithubImport
+    # Class that can be used for finding a GitLab user ID based on a GitHub user
+    # ID or username.
+    #
+    # Any found user IDs are cached in Redis to reduce the number of SQL queries
+    # executed over time. Valid keys are refreshed upon access so frequently
+    # used keys stick around.
+    #
+    # Lookups are cached even if no ID was found to remove the need for querying
+    # the database when most queries are not going to return results anyway.
+    class UserFinder
+      attr_reader :project, :client
+
+      # The base cache key to use for caching user IDs for a given GitHub user
+      # ID.
+      ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'.freeze
+
+      # The base cache key to use for caching user IDs for a given GitHub email
+      # address.
+      ID_FOR_EMAIL_CACHE_KEY =
+        'github-import/user-finder/id-for-email/%s'.freeze
+
+      # The base cache key to use for caching the Email addresses of GitHub
+      # usernames.
+      EMAIL_FOR_USERNAME_CACHE_KEY =
+        'github-import/user-finder/email-for-username/%s'.freeze
+
+      # project - An instance of `Project`
+      # client - An instance of `Gitlab::GithubImport::Client`
+      def initialize(project, client)
+        @project = project
+        @client = client
+      end
+
+      # Returns the GitLab user ID of an object's author.
+      #
+      # If the object has no author ID we'll use the ID of the GitLab ghost
+      # user.
+      def author_id_for(object)
+        id =
+          if object&.author
+            user_id_for(object.author)
+          else
+            GithubImport.ghost_user_id
+          end
+
+        if id
+          [id, true]
+        else
+          [project.creator_id, false]
+        end
+      end
+
+      # Returns the GitLab user ID of an issuable's assignee.
+      def assignee_id_for(issuable)
+        user_id_for(issuable.assignee) if issuable.assignee
+      end
+
+      # Returns the GitLab user ID for a GitHub user.
+      #
+      # user - An instance of `Gitlab::GithubImport::Representation::User`.
+      def user_id_for(user)
+        find(user.id, user.login)
+      end
+
+      # Returns the GitLab ID for the given GitHub ID or username.
+      #
+      # id - The ID of the GitHub user.
+      # username - The username of the GitHub user.
+      def find(id, username)
+        email = email_for_github_username(username)
+        cached, found_id = find_from_cache(id, email)
+
+        return found_id if found_id
+
+        # We only want to query the database if necessary. If previous lookups
+        # didn't yield a user ID we won't query the database again until the
+        # keys expire.
+        find_id_from_database(id, email) unless cached
+      end
+
+      # Finds a user ID from the cache for a given GitHub ID or Email.
+      def find_from_cache(id, email = nil)
+        id_exists, id_for_github_id = cached_id_for_github_id(id)
+
+        return [id_exists, id_for_github_id] if id_for_github_id
+
+        # Just in case no Email address could be retrieved (for whatever reason)
+        return [false] unless email
+
+        cached_id_for_github_email(email)
+      end
+
+      # Finds a GitLab user ID from the database for a given GitHub user ID or
+      # Email.
+      def find_id_from_database(id, email)
+        id_for_github_id(id) || id_for_github_email(email)
+      end
+
+      def email_for_github_username(username)
+        cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username
+        email = Caching.read(cache_key)
+
+        unless email
+          user = client.user(username)
+          email = Caching.write(cache_key, user.email) if user
+        end
+
+        email
+      end
+
+      def cached_id_for_github_id(id)
+        read_id_from_cache(ID_CACHE_KEY % id)
+      end
+
+      def cached_id_for_github_email(email)
+        read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email)
+      end
+
+      # Queries and caches the GitLab user ID for a GitHub user ID, if one was
+      # found.
+      def id_for_github_id(id)
+        gitlab_id = query_id_for_github_id(id) || nil
+
+        Caching.write(ID_CACHE_KEY % id, gitlab_id)
+      end
+
+      # Queries and caches the GitLab user ID for a GitHub email, if one was
+      # found.
+      def id_for_github_email(email)
+        gitlab_id = query_id_for_github_email(email) || nil
+
+        Caching.write(ID_FOR_EMAIL_CACHE_KEY % email, gitlab_id)
+      end
+
+      def query_id_for_github_id(id)
+        User.for_github_id(id).pluck(:id).first
+      end
+
+      def query_id_for_github_email(email)
+        User.by_any_email(email).pluck(:id).first
+      end
+
+      # Reads an ID from the cache.
+      #
+      # The return value is an Array with two values:
+      #
+      # 1. A boolean indicating if the key was present or not.
+      # 2. The ID as an Integer, or nil in case no ID could be found.
+      def read_id_from_cache(key)
+        value = Caching.read(key)
+        exists = !value.nil?
+        number = value.to_i
+
+        # The cache key may be empty to indicate a previously looked up user for
+        # which we couldn't find an ID.
+        [exists, number.positive? ? number : nil]
+      end
+    end
+  end
+end
diff --git a/lib/gitlab/github_import/user_formatter.rb b/lib/gitlab/github_import/user_formatter.rb
deleted file mode 100644
index 04c2964da20..00000000000
--- a/lib/gitlab/github_import/user_formatter.rb
+++ /dev/null
@@ -1,45 +0,0 @@
-module Gitlab
-  module GithubImport
-    class UserFormatter
-      attr_reader :client, :raw
-
-      delegate :id, :login, to: :raw, allow_nil: true
-
-      def initialize(client, raw)
-        @client = client
-        @raw = raw
-      end
-
-      def gitlab_id
-        return @gitlab_id if defined?(@gitlab_id)
-
-        @gitlab_id = find_by_external_uid || find_by_email
-      end
-
-      private
-
-      def email
-        @email ||= client.user(raw.login).try(:email)
-      end
-
-      def find_by_email
-        return nil unless email
-
-        User.find_by_any_email(email)
-            .try(:id)
-      end
-
-      def find_by_external_uid
-        return nil unless id
-
-        identities = ::Identity.arel_table
-
-        User.select(:id)
-            .joins(:identities).where(identities[:provider].eq(:github)
-            .and(identities[:extern_uid].eq(id)))
-            .first
-            .try(:id)
-      end
-    end
-  end
-end
diff --git a/lib/gitlab/github_import/wiki_formatter.rb b/lib/gitlab/github_import/wiki_formatter.rb
deleted file mode 100644
index ca8d96f5650..00000000000
--- a/lib/gitlab/github_import/wiki_formatter.rb
+++ /dev/null
@@ -1,19 +0,0 @@
-module Gitlab
-  module GithubImport
-    class WikiFormatter
-      attr_reader :project
-
-      def initialize(project)
-        @project = project
-      end
-
-      def disk_path
-        project.wiki.disk_path
-      end
-
-      def import_url
-        project.import_url.sub(/\.git\z/, ".wiki.git")
-      end
-    end
-  end
-end