summaryrefslogtreecommitdiff
path: root/app/models/merge_request_diff.rb
diff options
context:
space:
mode:
authorNick Thomas <nick@gitlab.com>2019-02-20 15:35:57 +0000
committerNick Thomas <nick@gitlab.com>2019-03-27 16:51:33 +0000
commit0e831b0b692f2988d3c84fc01a463b08afec05ad (patch)
tree3fdcb423db62141b2db2d2cc3f39986fb929c8af /app/models/merge_request_diff.rb
parent98824f3e97e24a5d6cb0688167bc8411a74739fc (diff)
downloadgitlab-ce-0e831b0b692f2988d3c84fc01a463b08afec05ad.tar.gz
Allow external diffs to be used conditionally
Since external diffs are likely to be a bit slower than in-database ones, add a mode that makes diffs external after they've been obsoleted by events. This should strike a balance between performance and disk space. A background cron drives the majority of migrations, since diffs become outdated through user actions.
Diffstat (limited to 'app/models/merge_request_diff.rb')
-rw-r--r--app/models/merge_request_diff.rb172
1 files changed, 151 insertions, 21 deletions
diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb
index 98db1bf7de7..2143571d1d4 100644
--- a/app/models/merge_request_diff.rb
+++ b/app/models/merge_request_diff.rb
@@ -12,6 +12,10 @@ class MergeRequestDiff < ActiveRecord::Base
# Don't display more than 100 commits at once
COMMITS_SAFE_SIZE = 100
+ # Applies to closed or merged MRs when determining whether to migrate their
+ # diffs to external storage
+ EXTERNAL_DIFF_CUTOFF = 7.days.freeze
+
belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff
@@ -48,6 +52,81 @@ class MergeRequestDiff < ActiveRecord::Base
end
scope :recent, -> { order(id: :desc).limit(100) }
+ scope :files_in_database, -> { where(stored_externally: [false, nil]) }
+
+ scope :not_latest_diffs, -> do
+ merge_requests = MergeRequest.arel_table
+ mr_diffs = arel_table
+
+ join_condition = merge_requests[:id].eq(mr_diffs[:merge_request_id])
+ .and(mr_diffs[:id].not_eq(merge_requests[:latest_merge_request_diff_id]))
+
+ arel_join = mr_diffs.join(merge_requests).on(join_condition)
+ joins(arel_join.join_sources)
+ end
+
+ scope :old_merged_diffs, -> (before) do
+ merge_requests = MergeRequest.arel_table
+ mr_metrics = MergeRequest::Metrics.arel_table
+ mr_diffs = arel_table
+
+ mr_join = mr_diffs
+ .join(merge_requests)
+ .on(mr_diffs[:merge_request_id].eq(merge_requests[:id]))
+
+ metrics_join_condition = mr_diffs[:merge_request_id]
+ .eq(mr_metrics[:merge_request_id])
+ .and(mr_metrics[:merged_at].not_eq(nil))
+
+ metrics_join = mr_diffs.join(mr_metrics).on(metrics_join_condition)
+
+ condition = MergeRequest.arel_table[:state].eq(:merged)
+ .and(MergeRequest::Metrics.arel_table[:merged_at].lteq(before))
+ .and(MergeRequest::Metrics.arel_table[:merged_at].not_eq(nil))
+
+ joins(metrics_join.join_sources, mr_join.join_sources).where(condition)
+ end
+
+ scope :old_closed_diffs, -> (before) do
+ condition = MergeRequest.arel_table[:state].eq(:closed)
+ .and(MergeRequest::Metrics.arel_table[:latest_closed_at].lteq(before))
+
+ joins(merge_request: :metrics).where(condition)
+ end
+
+ def self.ids_for_external_storage_migration(limit:)
+ # No point doing any work unless the feature is enabled
+ return [] unless Gitlab.config.external_diffs.enabled
+
+ case Gitlab.config.external_diffs.when
+ when 'always'
+ files_in_database.limit(limit).pluck(:id)
+ when 'outdated'
+ # Outdated is too complex to be a single SQL query, so split into three
+ before = EXTERNAL_DIFF_CUTOFF.ago
+
+ ids = files_in_database
+ .old_merged_diffs(before)
+ .limit(limit)
+ .pluck(:id)
+
+ return ids if ids.size >= limit
+
+ ids += files_in_database
+ .old_closed_diffs(before)
+ .limit(limit - ids.size)
+ .pluck(:id)
+
+ return ids if ids.size >= limit
+
+ ids + files_in_database
+ .not_latest_diffs
+ .limit(limit - ids.size)
+ .pluck(:id)
+ else
+ []
+ end
+ end
mount_uploader :external_diff, ExternalDiffUploader
@@ -55,7 +134,7 @@ class MergeRequestDiff < ActiveRecord::Base
# It allows you to override variables like head_commit_sha before getting diff.
after_create :save_git_content, unless: :importing?
- after_save :update_external_diff_store, if: :external_diff_changed?
+ after_save :update_external_diff_store, if: -> { !importing? && external_diff_changed? }
def self.find_by_diff_refs(diff_refs)
find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
@@ -294,6 +373,23 @@ class MergeRequestDiff < ActiveRecord::Base
end
end
+ # Transactionally migrate the current merge_request_diff_files entries to
+ # external storage. If external storage isn't an option for this diff, the
+ # method is a no-op.
+ def migrate_files_to_external_storage!
+ return if stored_externally? || !use_external_diff? || merge_request_diff_files.count == 0
+
+ rows = build_merge_request_diff_files(merge_request_diff_files)
+
+ transaction do
+ MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
+ create_merge_request_diff_files(rows)
+ save!
+ end
+
+ merge_request_diff_files.reload
+ end
+
private
def encode_in_base64?(diff_text)
@@ -301,20 +397,7 @@ class MergeRequestDiff < ActiveRecord::Base
diff_text.include?("\0")
end
- def create_merge_request_diff_files(diffs)
- rows =
- if has_attribute?(:external_diff) && Gitlab.config.external_diffs.enabled
- build_external_merge_request_diff_files(diffs)
- else
- build_merge_request_diff_files(diffs)
- end
-
- # Faster inserts
- Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
- end
-
- def build_external_merge_request_diff_files(diffs)
- rows = build_merge_request_diff_files(diffs)
+ def build_external_merge_request_diff_files(rows)
tempfile = build_external_diff_tempfile(rows)
self.external_diff = tempfile
@@ -325,16 +408,21 @@ class MergeRequestDiff < ActiveRecord::Base
tempfile&.unlink
end
+ def create_merge_request_diff_files(rows)
+ rows = build_external_merge_request_diff_files(rows) if use_external_diff?
+
+ # Faster inserts
+ Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
+ end
+
def build_external_diff_tempfile(rows)
Tempfile.open(external_diff.filename) do |file|
- rows.inject(0) do |offset, row|
+ rows.each do |row|
data = row.delete(:diff)
- row[:external_diff_offset] = offset
- row[:external_diff_size] = data.size
+ row[:external_diff_offset] = file.pos
+ row[:external_diff_size] = data.bytesize
file.write(data)
-
- offset + data.size
end
file
@@ -361,6 +449,47 @@ class MergeRequestDiff < ActiveRecord::Base
end
end
+ def use_external_diff?
+ return false unless has_attribute?(:external_diff)
+ return false unless Gitlab.config.external_diffs.enabled
+
+ case Gitlab.config.external_diffs.when
+ when 'always'
+ true
+ when 'outdated'
+ outdated_by_merge? || outdated_by_closure? || old_version?
+ else
+ false # Disable external diffs if misconfigured
+ end
+ end
+
+ def outdated_by_merge?
+ return false unless merge_request&.metrics&.merged_at
+
+ merge_request.merged? && merge_request.metrics.merged_at < EXTERNAL_DIFF_CUTOFF.ago
+ end
+
+ def outdated_by_closure?
+ return false unless merge_request&.metrics&.latest_closed_at
+
+ merge_request.closed? && merge_request.metrics.latest_closed_at < EXTERNAL_DIFF_CUTOFF.ago
+ end
+
+ # We can't rely on `merge_request.latest_merge_request_diff_id` because that
+ # may have been changed in `save_git_content` without being reflected in
+ # the association's instance. This query is always subject to races, but
+ # the worst case is that we *don't* make a diff external when we could. The
+ # background worker will make it external at a later date.
+ def old_version?
+ latest_id = MergeRequest
+ .where(id: merge_request_id)
+ .limit(1)
+ .pluck(:latest_merge_request_diff_id)
+ .first
+
+ self.id != latest_id
+ end
+
def load_diffs(options)
# Ensure all diff files operate on the same external diff file instance if
# present. This reduces file open/close overhead.
@@ -394,7 +523,8 @@ class MergeRequestDiff < ActiveRecord::Base
if diff_collection.any?
new_attributes[:state] = :collected
- create_merge_request_diff_files(diff_collection)
+ rows = build_merge_request_diff_files(diff_collection)
+ create_merge_request_diff_files(rows)
end
# Set our state to 'overflow' to make the #empty? and #collected?