summaryrefslogtreecommitdiff
path: root/lib/gitlab/github_import/user_finder.rb
blob: f583ef39d13fd05e28a024b4a7c224abe5d9c079 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
# frozen_string_literal: true

module Gitlab
  module GithubImport
    # Class that can be used for finding a GitLab user ID based on a GitHub user
    # ID or username.
    #
    # Any found user IDs are cached in Redis to reduce the number of SQL queries
    # executed over time. Valid keys are refreshed upon access so frequently
    # used keys stick around.
    #
    # Lookups are cached even if no ID was found to remove the need for querying
    # the database when most queries are not going to return results anyway.
    class UserFinder
      attr_reader :project, :client

      # The base cache key to use for caching user IDs for a given GitHub user
      # ID.
      ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'

      # The base cache key to use for caching user IDs for a given GitHub email
      # address.
      ID_FOR_EMAIL_CACHE_KEY =
        'github-import/user-finder/id-for-email/%s'

      # The base cache key to use for caching the Email addresses of GitHub
      # usernames.
      EMAIL_FOR_USERNAME_CACHE_KEY =
        'github-import/user-finder/email-for-username/%s'

      # project - An instance of `Project`
      # client - An instance of `Gitlab::GithubImport::Client`
      def initialize(project, client)
        @project = project
        @client = client
      end

      # Returns the GitLab user ID of an object's author.
      #
      # If the object has no author ID we'll use the ID of the GitLab ghost
      # user.
      def author_id_for(object)
        id =
          if object&.author
            user_id_for(object.author)
          else
            GithubImport.ghost_user_id
          end

        if id
          [id, true]
        else
          [project.creator_id, false]
        end
      end

      # Returns the GitLab user ID of an issuable's assignee.
      def assignee_id_for(issuable)
        user_id_for(issuable.assignee) if issuable.assignee
      end

      # Returns the GitLab user ID for a GitHub user.
      #
      # user - An instance of `Gitlab::GithubImport::Representation::User`.
      def user_id_for(user)
        find(user.id, user.login) if user.present?
      end

      # Returns the GitLab ID for the given GitHub ID or username.
      #
      # id - The ID of the GitHub user.
      # username - The username of the GitHub user.
      def find(id, username)
        email = email_for_github_username(username)
        cached, found_id = find_from_cache(id, email)

        return found_id if found_id

        # We only want to query the database if necessary. If previous lookups
        # didn't yield a user ID we won't query the database again until the
        # keys expire.
        find_id_from_database(id, email) unless cached
      end

      # Finds a user ID from the cache for a given GitHub ID or Email.
      def find_from_cache(id, email = nil)
        id_exists, id_for_github_id = cached_id_for_github_id(id)

        return [id_exists, id_for_github_id] if id_for_github_id

        # Just in case no Email address could be retrieved (for whatever reason)
        return [false] unless email

        cached_id_for_github_email(email)
      end

      # Finds a GitLab user ID from the database for a given GitHub user ID or
      # Email.
      def find_id_from_database(id, email)
        id_for_github_id(id) || id_for_github_email(email)
      end

      def email_for_github_username(username)
        cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username
        email = Gitlab::Cache::Import::Caching.read(cache_key)

        unless email
          user = client.user(username)
          email = Gitlab::Cache::Import::Caching.write(cache_key, user.email) if user
        end

        email
      end

      def cached_id_for_github_id(id)
        read_id_from_cache(ID_CACHE_KEY % id)
      end

      def cached_id_for_github_email(email)
        read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email)
      end

      # If importing from github.com, queries and caches the GitLab user ID for
      # a GitHub user ID, if one was found.
      #
      # When importing from Github Enterprise, do not query user by Github ID
      # since we only have users' Github ID from github.com.
      def id_for_github_id(id)
        gitlab_id =
          if project.github_enterprise_import?
            nil
          else
            query_id_for_github_id(id)
          end

        Gitlab::Cache::Import::Caching.write(ID_CACHE_KEY % id, gitlab_id)
      end

      # Queries and caches the GitLab user ID for a GitHub email, if one was
      # found.
      def id_for_github_email(email)
        gitlab_id = query_id_for_github_email(email) || nil

        Gitlab::Cache::Import::Caching.write(ID_FOR_EMAIL_CACHE_KEY % email, gitlab_id)
      end

      # rubocop: disable CodeReuse/ActiveRecord
      def query_id_for_github_id(id)
        User.by_provider_and_extern_uid(:github, id).select(:id).first&.id
      end
      # rubocop: enable CodeReuse/ActiveRecord

      # rubocop: disable CodeReuse/ActiveRecord
      def query_id_for_github_email(email)
        User.by_any_email(email).pluck(:id).first
      end
      # rubocop: enable CodeReuse/ActiveRecord

      # Reads an ID from the cache.
      #
      # The return value is an Array with two values:
      #
      # 1. A boolean indicating if the key was present or not.
      # 2. The ID as an Integer, or nil in case no ID could be found.
      def read_id_from_cache(key)
        value = Gitlab::Cache::Import::Caching.read(key)
        exists = !value.nil?
        number = value.to_i

        # The cache key may be empty to indicate a previously looked up user for
        # which we couldn't find an ID.
        [exists, number > 0 ? number : nil]
      end
    end
  end
end