summaryrefslogtreecommitdiff
path: root/app/models/merge_request_diff.rb
diff options
context:
space:
mode:
Diffstat (limited to 'app/models/merge_request_diff.rb')
-rw-r--r--app/models/merge_request_diff.rb206
1 files changed, 176 insertions, 30 deletions
diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb
index e286a4e57f2..f45bd0e03de 100644
--- a/app/models/merge_request_diff.rb
+++ b/app/models/merge_request_diff.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-class MergeRequestDiff < ActiveRecord::Base
+class MergeRequestDiff < ApplicationRecord
include Sortable
include Importable
include ManualInverseAssociation
@@ -12,6 +12,10 @@ class MergeRequestDiff < ActiveRecord::Base
# Don't display more than 100 commits at once
COMMITS_SAFE_SIZE = 100
+ # Applies to closed or merged MRs when determining whether to migrate their
+ # diffs to external storage
+ EXTERNAL_DIFF_CUTOFF = 7.days.freeze
+
belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff
@@ -22,6 +26,8 @@ class MergeRequestDiff < ActiveRecord::Base
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
+ validates :base_commit_sha, :head_commit_sha, :start_commit_sha, sha: true
+
state_machine :state, initial: :empty do
event :clean do
transition any => :without_files
@@ -45,7 +51,86 @@ class MergeRequestDiff < ActiveRecord::Base
joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
end
+ scope :by_project_id, -> (project_id) do
+ joins(:merge_request).where(merge_requests: { target_project_id: project_id })
+ end
+
scope :recent, -> { order(id: :desc).limit(100) }
+ scope :files_in_database, -> { where(stored_externally: [false, nil]) }
+
+ scope :not_latest_diffs, -> do
+ merge_requests = MergeRequest.arel_table
+ mr_diffs = arel_table
+
+ join_condition = merge_requests[:id].eq(mr_diffs[:merge_request_id])
+ .and(mr_diffs[:id].not_eq(merge_requests[:latest_merge_request_diff_id]))
+
+ arel_join = mr_diffs.join(merge_requests).on(join_condition)
+ joins(arel_join.join_sources)
+ end
+
+ scope :old_merged_diffs, -> (before) do
+ merge_requests = MergeRequest.arel_table
+ mr_metrics = MergeRequest::Metrics.arel_table
+ mr_diffs = arel_table
+
+ mr_join = mr_diffs
+ .join(merge_requests)
+ .on(mr_diffs[:merge_request_id].eq(merge_requests[:id]))
+
+ metrics_join_condition = mr_diffs[:merge_request_id]
+ .eq(mr_metrics[:merge_request_id])
+ .and(mr_metrics[:merged_at].not_eq(nil))
+
+ metrics_join = mr_diffs.join(mr_metrics).on(metrics_join_condition)
+
+ condition = MergeRequest.arel_table[:state].eq(:merged)
+ .and(MergeRequest::Metrics.arel_table[:merged_at].lteq(before))
+ .and(MergeRequest::Metrics.arel_table[:merged_at].not_eq(nil))
+
+ joins(metrics_join.join_sources, mr_join.join_sources).where(condition)
+ end
+
+ scope :old_closed_diffs, -> (before) do
+ condition = MergeRequest.arel_table[:state].eq(:closed)
+ .and(MergeRequest::Metrics.arel_table[:latest_closed_at].lteq(before))
+
+ joins(merge_request: :metrics).where(condition)
+ end
+
+ def self.ids_for_external_storage_migration(limit:)
+ # No point doing any work unless the feature is enabled
+ return [] unless Gitlab.config.external_diffs.enabled
+
+ case Gitlab.config.external_diffs.when
+ when 'always'
+ files_in_database.limit(limit).pluck(:id)
+ when 'outdated'
+ # Outdated is too complex to be a single SQL query, so split into three
+ before = EXTERNAL_DIFF_CUTOFF.ago
+
+ ids = files_in_database
+ .old_merged_diffs(before)
+ .limit(limit)
+ .pluck(:id)
+
+ return ids if ids.size >= limit
+
+ ids += files_in_database
+ .old_closed_diffs(before)
+ .limit(limit - ids.size)
+ .pluck(:id)
+
+ return ids if ids.size >= limit
+
+ ids + files_in_database
+ .not_latest_diffs
+ .limit(limit - ids.size)
+ .pluck(:id)
+ else
+ []
+ end
+ end
mount_uploader :external_diff, ExternalDiffUploader
@@ -53,7 +138,7 @@ class MergeRequestDiff < ActiveRecord::Base
# It allows you to override variables like head_commit_sha before getting diff.
after_create :save_git_content, unless: :importing?
- after_save :update_external_diff_store, if: :external_diff_changed?
+ after_save :update_external_diff_store, if: -> { !importing? && saved_change_to_external_diff? }
def self.find_by_diff_refs(diff_refs)
find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
@@ -73,7 +158,14 @@ class MergeRequestDiff < ActiveRecord::Base
ensure_commit_shas
save_commits
save_diffs
+
+ # Another set of `after_save` hooks will be called here when we update the record
save
+ # We need to reset so that dirty tracking is reset when running the original set
+ # of `after_save` hooks that come after this `after_create` hook. Otherwise, the
+ # hooks that run when an attribute was changed are run twice.
+ reset
+
keep_around_commits
end
@@ -267,7 +359,7 @@ class MergeRequestDiff < ActiveRecord::Base
has_attribute?(:external_diff_store)
end
- def external_diff_changed?
+ def saved_change_to_external_diff?
super if has_attribute?(:external_diff)
end
@@ -284,32 +376,39 @@ class MergeRequestDiff < ActiveRecord::Base
return yield(@external_diff_file) if @external_diff_file
external_diff.open do |file|
- begin
- @external_diff_file = file
+ @external_diff_file = file
- yield(@external_diff_file)
- ensure
- @external_diff_file = nil
- end
+ yield(@external_diff_file)
+ ensure
+ @external_diff_file = nil
end
end
- private
+ # Transactionally migrate the current merge_request_diff_files entries to
+ # external storage. If external storage isn't an option for this diff, the
+ # method is a no-op.
+ def migrate_files_to_external_storage!
+ return if stored_externally? || !use_external_diff? || merge_request_diff_files.count == 0
- def create_merge_request_diff_files(diffs)
- rows =
- if has_attribute?(:external_diff) && Gitlab.config.external_diffs.enabled
- build_external_merge_request_diff_files(diffs)
- else
- build_merge_request_diff_files(diffs)
- end
+ rows = build_merge_request_diff_files(merge_request_diff_files)
- # Faster inserts
- Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
+ transaction do
+ MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
+ create_merge_request_diff_files(rows)
+ save!
+ end
+
+ merge_request_diff_files.reset
end
- def build_external_merge_request_diff_files(diffs)
- rows = build_merge_request_diff_files(diffs)
+ private
+
+ def encode_in_base64?(diff_text)
+ (diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?) ||
+ diff_text.include?("\0")
+ end
+
+ def build_external_merge_request_diff_files(rows)
tempfile = build_external_diff_tempfile(rows)
self.external_diff = tempfile
@@ -320,16 +419,21 @@ class MergeRequestDiff < ActiveRecord::Base
tempfile&.unlink
end
+ def create_merge_request_diff_files(rows)
+ rows = build_external_merge_request_diff_files(rows) if use_external_diff?
+
+ # Faster inserts
+ Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
+ end
+
def build_external_diff_tempfile(rows)
Tempfile.open(external_diff.filename) do |file|
- rows.inject(0) do |offset, row|
+ rows.each do |row|
data = row.delete(:diff)
- row[:external_diff_offset] = offset
- row[:external_diff_size] = data.size
+ row[:external_diff_offset] = file.pos
+ row[:external_diff_size] = data.bytesize
file.write(data)
-
- offset + data.size
end
file
@@ -348,7 +452,7 @@ class MergeRequestDiff < ActiveRecord::Base
diff_hash.tap do |hash|
diff_text = hash[:diff]
- if diff_text.encoding == Encoding::BINARY && !diff_text.ascii_only?
+ if encode_in_base64?(diff_text)
hash[:binary] = true
hash[:diff] = [diff_text].pack('m0')
end
@@ -356,6 +460,47 @@ class MergeRequestDiff < ActiveRecord::Base
end
end
+ def use_external_diff?
+ return false unless has_attribute?(:external_diff)
+ return false unless Gitlab.config.external_diffs.enabled
+
+ case Gitlab.config.external_diffs.when
+ when 'always'
+ true
+ when 'outdated'
+ outdated_by_merge? || outdated_by_closure? || old_version?
+ else
+ false # Disable external diffs if misconfigured
+ end
+ end
+
+ def outdated_by_merge?
+ return false unless merge_request&.metrics&.merged_at
+
+ merge_request.merged? && merge_request.metrics.merged_at < EXTERNAL_DIFF_CUTOFF.ago
+ end
+
+ def outdated_by_closure?
+ return false unless merge_request&.metrics&.latest_closed_at
+
+ merge_request.closed? && merge_request.metrics.latest_closed_at < EXTERNAL_DIFF_CUTOFF.ago
+ end
+
+ # We can't rely on `merge_request.latest_merge_request_diff_id` because that
+ # may have been changed in `save_git_content` without being reflected in
+ # the association's instance. This query is always subject to races, but
+ # the worst case is that we *don't* make a diff external when we could. The
+ # background worker will make it external at a later date.
+ def old_version?
+ latest_id = MergeRequest
+ .where(id: merge_request_id)
+ .limit(1)
+ .pluck(:latest_merge_request_diff_id)
+ .first
+
+ self.id != latest_id
+ end
+
def load_diffs(options)
# Ensure all diff files operate on the same external diff file instance if
# present. This reduces file open/close overhead.
@@ -389,7 +534,8 @@ class MergeRequestDiff < ActiveRecord::Base
if diff_collection.any?
new_attributes[:state] = :collected
- create_merge_request_diff_files(diff_collection)
+ rows = build_merge_request_diff_files(diff_collection)
+ create_merge_request_diff_files(rows)
end
# Set our state to 'overflow' to make the #empty? and #collected?
@@ -406,10 +552,10 @@ class MergeRequestDiff < ActiveRecord::Base
def save_commits
MergeRequestDiffCommit.create_bulk(self.id, compare.commits.reverse)
- # merge_request_diff_commits.reload is preferred way to reload associated
+ # merge_request_diff_commits.reset is preferred way to reload associated
# objects but it returns cached result for some reason in this case
# we can circumvent that by specifying that we need an uncached reload
- commits = self.class.uncached { merge_request_diff_commits.reload }
+ commits = self.class.uncached { merge_request_diff_commits.reset }
self.commits_count = commits.size
end