summaryrefslogtreecommitdiff
path: root/lib/gitlab/github_import/user_finder.rb
blob: b8751def08ff14d66544177f1c434a032184ccaa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# frozen_string_literal: true

module Gitlab
  module GithubImport
    # Class that can be used for finding a GitLab user ID based on a GitHub user
    # ID or username.
    #
    # Any found user IDs are cached in Redis to reduce the number of SQL queries
    # executed over time. Valid keys are refreshed upon access so frequently
    # used keys stick around.
    #
    # Lookups are cached even if no ID was found to remove the need for querying
    # the database when most queries are not going to return results anyway.
    class UserFinder
      attr_reader :project, :client

      # The base cache key to use for caching user IDs for a given GitHub user
      # ID.
      ID_CACHE_KEY = 'github-import/user-finder/user-id/%s'

      # The base cache key to use for caching user IDs for a given GitHub email
      # address.
      ID_FOR_EMAIL_CACHE_KEY =
        'github-import/user-finder/id-for-email/%s'

      # The base cache key to use for caching the Email addresses of GitHub
      # usernames.
      EMAIL_FOR_USERNAME_CACHE_KEY =
        'github-import/user-finder/email-for-username/%s'

      # project - An instance of `Project`
      # client - An instance of `Gitlab::GithubImport::Client`
      def initialize(project, client)
        @project = project
        @client = client
      end

      # Returns the GitLab user ID of an object's author.
      #
      # If the object has no author ID we'll use the ID of the GitLab ghost
      # user.
      # object - An instance of `Hash` or a `Github::Representer`
      def author_id_for(object, author_key: :author)
        user_info = case author_key
                    when :actor
                      object[:actor]
                    when :assignee
                      object[:assignee]
                    when :requested_reviewer
                      object[:requested_reviewer]
                    when :review_requester
                      object[:review_requester]
                    else
                      object ? object[:author] : nil
                    end

        id = user_info ? user_id_for(user_info) : GithubImport.ghost_user_id

        if id
          [id, true]
        else
          [project.creator_id, false]
        end
      end

      # Returns the GitLab user ID of an issuable's assignee.
      def assignee_id_for(issuable)
        user_id_for(issuable[:assignee]) if issuable[:assignee]
      end

      # Returns the GitLab user ID for a GitHub user.
      #
      # user - An instance of `Gitlab::GithubImport::Representation::User` or `Hash`.
      def user_id_for(user)
        find(user[:id], user[:login]) if user.present?
      end

      # Returns the GitLab ID for the given GitHub ID or username.
      #
      # id - The ID of the GitHub user.
      # username - The username of the GitHub user.
      def find(id, username)
        email = email_for_github_username(username)
        cached, found_id = find_from_cache(id, email)

        return found_id if found_id

        # We only want to query the database if necessary. If previous lookups
        # didn't yield a user ID we won't query the database again until the
        # keys expire.
        find_id_from_database(id, email) unless cached
      end

      # Finds a user ID from the cache for a given GitHub ID or Email.
      def find_from_cache(id, email = nil)
        id_exists, id_for_github_id = cached_id_for_github_id(id)

        return [id_exists, id_for_github_id] if id_for_github_id

        # Just in case no Email address could be retrieved (for whatever reason)
        return [false] unless email

        cached_id_for_github_email(email)
      end

      # Finds a GitLab user ID from the database for a given GitHub user ID or
      # Email.
      def find_id_from_database(id, email)
        id_for_github_id(id) || id_for_github_email(email)
      end

      def email_for_github_username(username)
        cache_key = EMAIL_FOR_USERNAME_CACHE_KEY % username
        email = Gitlab::Cache::Import::Caching.read(cache_key)

        unless email
          user = client.user(username)
          email = Gitlab::Cache::Import::Caching.write(cache_key, user[:email], timeout: timeout(user[:email])) if user
        end

        email
      end

      def cached_id_for_github_id(id)
        read_id_from_cache(ID_CACHE_KEY % id)
      end

      def cached_id_for_github_email(email)
        read_id_from_cache(ID_FOR_EMAIL_CACHE_KEY % email)
      end

      # If importing from github.com, queries and caches the GitLab user ID for
      # a GitHub user ID, if one was found.
      #
      # When importing from Github Enterprise, do not query user by Github ID
      # since we only have users' Github ID from github.com.
      def id_for_github_id(id)
        gitlab_id =
          if project.github_enterprise_import?
            nil
          else
            query_id_for_github_id(id)
          end

        Gitlab::Cache::Import::Caching.write(ID_CACHE_KEY % id, gitlab_id)
      end

      # Queries and caches the GitLab user ID for a GitHub email, if one was
      # found.
      def id_for_github_email(email)
        gitlab_id = query_id_for_github_email(email) || nil

        Gitlab::Cache::Import::Caching.write(ID_FOR_EMAIL_CACHE_KEY % email, gitlab_id)
      end

      # rubocop: disable CodeReuse/ActiveRecord
      def query_id_for_github_id(id)
        User.by_provider_and_extern_uid(:github, id).select(:id).first&.id
      end
      # rubocop: enable CodeReuse/ActiveRecord

      # rubocop: disable CodeReuse/ActiveRecord
      def query_id_for_github_email(email)
        User.by_any_email(email).pick(:id)
      end
      # rubocop: enable CodeReuse/ActiveRecord

      # Reads an ID from the cache.
      #
      # The return value is an Array with two values:
      #
      # 1. A boolean indicating if the key was present or not.
      # 2. The ID as an Integer, or nil in case no ID could be found.
      def read_id_from_cache(key)
        value = Gitlab::Cache::Import::Caching.read(key)
        exists = !value.nil?
        number = value.to_i

        # The cache key may be empty to indicate a previously looked up user for
        # which we couldn't find an ID.
        [exists, number > 0 ? number : nil]
      end

      private

      def timeout(email)
        if email
          Gitlab::Cache::Import::Caching::TIMEOUT
        else
          Gitlab::Cache::Import::Caching::SHORTER_TIMEOUT
        end
      end
    end
  end
end