summaryrefslogtreecommitdiff
path: root/app/models/merge_request_diff_commit.rb
blob: fc08dd4d9c8e544e78409205a7c38b71f9d9ed4f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# frozen_string_literal: true

class MergeRequestDiffCommit < ApplicationRecord
  extend SuppressCompositePrimaryKeyWarning

  include BulkInsertSafe
  include ShaAttribute
  include CachedCommit
  include IgnorableColumns
  include FromUnion

  ignore_column %i[author_name author_email committer_name committer_email],
    remove_with: '14.6',
    remove_after: '2021-11-22'

  belongs_to :merge_request_diff

  # This relation is called `commit_author` and not `author`, as the project
  # import/export logic treats relations named `author` as instances of the
  # `User` class.
  #
  # NOTE: these columns are _not_ indexed, nor do they use foreign keys.
  #
  # This is deliberate, as creating these indexes on GitLab.com takes a _very_
  # long time. In addition, there's no real need for them either based on how
  # this data is used.
  #
  # For more information, refer to the following:
  #
  # - https://gitlab.com/gitlab-com/gl-infra/production/-/issues/5038#note_614592881
  # - https://gitlab.com/gitlab-org/gitlab/-/merge_requests/63669
  belongs_to :commit_author, class_name: 'MergeRequest::DiffCommitUser'
  belongs_to :committer, class_name: 'MergeRequest::DiffCommitUser'

  sha_attribute :sha
  alias_attribute :id, :sha

  attribute :trailers, :ind_jsonb
  validates :trailers, json_schema: { filename: 'git_trailers' }

  scope :with_users, -> { preload(:commit_author, :committer) }

  # A list of keys of which their values need to be trimmed before they can be
  # inserted into the merge_request_diff_commit_users table.
  TRIM_USER_KEYS =
    %i[author_name author_email committer_name committer_email].freeze

  # Deprecated; use `bulk_insert!` from `BulkInsertSafe` mixin instead.
  # cf. https://gitlab.com/gitlab-org/gitlab/issues/207989 for progress
  def self.create_bulk(merge_request_diff_id, commits)
    commit_hashes, user_tuples = prepare_commits_for_bulk_insert(commits)
    users = MergeRequest::DiffCommitUser.bulk_find_or_create(user_tuples)

    rows = commit_hashes.map.with_index do |commit_hash, index|
      sha = commit_hash.delete(:id)
      author = users[[commit_hash[:author_name], commit_hash[:author_email]]]
      committer =
        users[[commit_hash[:committer_name], commit_hash[:committer_email]]]

      # These fields are only used to determine the author/committer IDs, we
      # don't store them in the DB.
      commit_hash = commit_hash
        .except(:author_name, :author_email, :committer_name, :committer_email)

      commit_hash.merge(
        commit_author_id: author.id,
        committer_id: committer.id,
        merge_request_diff_id: merge_request_diff_id,
        relative_order: index,
        sha: Gitlab::Database::ShaAttribute.serialize(sha), # rubocop:disable Cop/ActiveRecordSerialize
        authored_date: Gitlab::Database.sanitize_timestamp(commit_hash[:authored_date]),
        committed_date: Gitlab::Database.sanitize_timestamp(commit_hash[:committed_date]),
        trailers: Gitlab::Json.dump(commit_hash.fetch(:trailers, {}))
      )
    end

    ApplicationRecord.legacy_bulk_insert(self.table_name, rows) # rubocop:disable Gitlab/BulkInsert
  end

  def self.prepare_commits_for_bulk_insert(commits)
    user_tuples = Set.new
    hashes = commits.map do |commit|
      hash = commit.to_hash.except(:parent_ids, :referenced_by)

      TRIM_USER_KEYS.each do |key|
        hash[key] = MergeRequest::DiffCommitUser.prepare(hash[key])
      end

      user_tuples << [hash[:author_name], hash[:author_email]]
      user_tuples << [hash[:committer_name], hash[:committer_email]]

      hash
    end

    [hashes, user_tuples]
  end

  def self.oldest_merge_request_id_per_commit(project_id, shas)
    # This method is defined here and not on MergeRequest, otherwise the SHA
    # values used in the WHERE below won't be encoded correctly.
    select(['merge_request_diff_commits.sha AS sha', 'min(merge_requests.id) AS merge_request_id'])
      .joins(:merge_request_diff)
      .joins(
        'INNER JOIN merge_requests ' \
          'ON merge_requests.latest_merge_request_diff_id = merge_request_diffs.id'
      )
      .where(sha: shas)
      .where(
        merge_requests: {
          target_project_id: project_id,
          state_id: MergeRequest.available_states[:merged]
        }
      )
      .group(:sha)
  end

  def author_name
    commit_author&.name
  end

  def author_email
    commit_author&.email
  end

  def committer_name
    committer&.name
  end

  def committer_email
    committer&.email
  end
end