summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Thomas <nick@gitlab.com>2019-02-20 15:35:57 +0000
committerNick Thomas <nick@gitlab.com>2019-03-27 16:51:33 +0000
commit0e831b0b692f2988d3c84fc01a463b08afec05ad (patch)
tree3fdcb423db62141b2db2d2cc3f39986fb929c8af
parent98824f3e97e24a5d6cb0688167bc8411a74739fc (diff)
downloadgitlab-ce-0e831b0b692f2988d3c84fc01a463b08afec05ad.tar.gz
Allow external diffs to be used conditionally
Since external diffs are likely to be a bit slower than in-database ones, add a mode that makes diffs external after they've been obsoleted by events. This should strike a balance between performance and disk space. A background cron drives the majority of migrations, since diffs become outdated through user actions.
-rw-r--r--app/models/merge_request_diff.rb172
-rw-r--r--app/services/merge_requests/migrate_external_diffs_service.rb23
-rw-r--r--app/workers/all_queues.yml2
-rw-r--r--app/workers/migrate_external_diffs_worker.rb12
-rw-r--r--app/workers/schedule_migrate_external_diffs_worker.rb14
-rw-r--r--changelogs/unreleased/54670-external-diffs-when-outdated.yml5
-rw-r--r--config/gitlab.yml.example8
-rw-r--r--config/initializers/1_settings.rb5
-rw-r--r--config/sidekiq_queues.yml1
-rw-r--r--db/migrate/20190222051615_add_indexes_for_merge_request_diffs_query.rb42
-rw-r--r--db/schema.rb3
-rw-r--r--doc/administration/merge_request_diffs.md44
-rw-r--r--spec/factories/merge_requests.rb16
-rw-r--r--spec/models/merge_request_diff_spec.rb147
-rw-r--r--spec/services/merge_requests/migrate_external_diffs_service_spec.rb43
-rw-r--r--spec/workers/migrate_external_diffs_worker_spec.rb25
-rw-r--r--spec/workers/schedule_migrate_external_diffs_worker_spec.rb25
17 files changed, 563 insertions, 24 deletions
diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb
index 98db1bf7de7..2143571d1d4 100644
--- a/app/models/merge_request_diff.rb
+++ b/app/models/merge_request_diff.rb
@@ -12,6 +12,10 @@ class MergeRequestDiff < ActiveRecord::Base
# Don't display more than 100 commits at once
COMMITS_SAFE_SIZE = 100
+ # Applies to closed or merged MRs when determining whether to migrate their
+ # diffs to external storage
+ EXTERNAL_DIFF_CUTOFF = 7.days.freeze
+
belongs_to :merge_request
manual_inverse_association :merge_request, :merge_request_diff
@@ -48,6 +52,81 @@ class MergeRequestDiff < ActiveRecord::Base
end
scope :recent, -> { order(id: :desc).limit(100) }
+ scope :files_in_database, -> { where(stored_externally: [false, nil]) }
+
+ scope :not_latest_diffs, -> do
+ merge_requests = MergeRequest.arel_table
+ mr_diffs = arel_table
+
+ join_condition = merge_requests[:id].eq(mr_diffs[:merge_request_id])
+ .and(mr_diffs[:id].not_eq(merge_requests[:latest_merge_request_diff_id]))
+
+ arel_join = mr_diffs.join(merge_requests).on(join_condition)
+ joins(arel_join.join_sources)
+ end
+
+ scope :old_merged_diffs, -> (before) do
+ merge_requests = MergeRequest.arel_table
+ mr_metrics = MergeRequest::Metrics.arel_table
+ mr_diffs = arel_table
+
+ mr_join = mr_diffs
+ .join(merge_requests)
+ .on(mr_diffs[:merge_request_id].eq(merge_requests[:id]))
+
+ metrics_join_condition = mr_diffs[:merge_request_id]
+ .eq(mr_metrics[:merge_request_id])
+ .and(mr_metrics[:merged_at].not_eq(nil))
+
+ metrics_join = mr_diffs.join(mr_metrics).on(metrics_join_condition)
+
+ condition = MergeRequest.arel_table[:state].eq(:merged)
+ .and(MergeRequest::Metrics.arel_table[:merged_at].lteq(before))
+ .and(MergeRequest::Metrics.arel_table[:merged_at].not_eq(nil))
+
+ joins(metrics_join.join_sources, mr_join.join_sources).where(condition)
+ end
+
+ scope :old_closed_diffs, -> (before) do
+ condition = MergeRequest.arel_table[:state].eq(:closed)
+ .and(MergeRequest::Metrics.arel_table[:latest_closed_at].lteq(before))
+
+ joins(merge_request: :metrics).where(condition)
+ end
+
+ def self.ids_for_external_storage_migration(limit:)
+ # No point doing any work unless the feature is enabled
+ return [] unless Gitlab.config.external_diffs.enabled
+
+ case Gitlab.config.external_diffs.when
+ when 'always'
+ files_in_database.limit(limit).pluck(:id)
+ when 'outdated'
+ # Outdated is too complex to be a single SQL query, so split into three
+ before = EXTERNAL_DIFF_CUTOFF.ago
+
+ ids = files_in_database
+ .old_merged_diffs(before)
+ .limit(limit)
+ .pluck(:id)
+
+ return ids if ids.size >= limit
+
+ ids += files_in_database
+ .old_closed_diffs(before)
+ .limit(limit - ids.size)
+ .pluck(:id)
+
+ return ids if ids.size >= limit
+
+ ids + files_in_database
+ .not_latest_diffs
+ .limit(limit - ids.size)
+ .pluck(:id)
+ else
+ []
+ end
+ end
mount_uploader :external_diff, ExternalDiffUploader
@@ -55,7 +134,7 @@ class MergeRequestDiff < ActiveRecord::Base
# It allows you to override variables like head_commit_sha before getting diff.
after_create :save_git_content, unless: :importing?
- after_save :update_external_diff_store, if: :external_diff_changed?
+ after_save :update_external_diff_store, if: -> { !importing? && external_diff_changed? }
def self.find_by_diff_refs(diff_refs)
find_by(start_commit_sha: diff_refs.start_sha, head_commit_sha: diff_refs.head_sha, base_commit_sha: diff_refs.base_sha)
@@ -294,6 +373,23 @@ class MergeRequestDiff < ActiveRecord::Base
end
end
+ # Transactionally migrate the current merge_request_diff_files entries to
+ # external storage. If external storage isn't an option for this diff, the
+ # method is a no-op.
+ def migrate_files_to_external_storage!
+ return if stored_externally? || !use_external_diff? || merge_request_diff_files.count == 0
+
+ rows = build_merge_request_diff_files(merge_request_diff_files)
+
+ transaction do
+ MergeRequestDiffFile.where(merge_request_diff_id: id).delete_all
+ create_merge_request_diff_files(rows)
+ save!
+ end
+
+ merge_request_diff_files.reload
+ end
+
private
def encode_in_base64?(diff_text)
@@ -301,20 +397,7 @@ class MergeRequestDiff < ActiveRecord::Base
diff_text.include?("\0")
end
- def create_merge_request_diff_files(diffs)
- rows =
- if has_attribute?(:external_diff) && Gitlab.config.external_diffs.enabled
- build_external_merge_request_diff_files(diffs)
- else
- build_merge_request_diff_files(diffs)
- end
-
- # Faster inserts
- Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
- end
-
- def build_external_merge_request_diff_files(diffs)
- rows = build_merge_request_diff_files(diffs)
+ def build_external_merge_request_diff_files(rows)
tempfile = build_external_diff_tempfile(rows)
self.external_diff = tempfile
@@ -325,16 +408,21 @@ class MergeRequestDiff < ActiveRecord::Base
tempfile&.unlink
end
+ def create_merge_request_diff_files(rows)
+ rows = build_external_merge_request_diff_files(rows) if use_external_diff?
+
+ # Faster inserts
+ Gitlab::Database.bulk_insert('merge_request_diff_files', rows)
+ end
+
def build_external_diff_tempfile(rows)
Tempfile.open(external_diff.filename) do |file|
- rows.inject(0) do |offset, row|
+ rows.each do |row|
data = row.delete(:diff)
- row[:external_diff_offset] = offset
- row[:external_diff_size] = data.size
+ row[:external_diff_offset] = file.pos
+ row[:external_diff_size] = data.bytesize
file.write(data)
-
- offset + data.size
end
file
@@ -361,6 +449,47 @@ class MergeRequestDiff < ActiveRecord::Base
end
end
+ def use_external_diff?
+ return false unless has_attribute?(:external_diff)
+ return false unless Gitlab.config.external_diffs.enabled
+
+ case Gitlab.config.external_diffs.when
+ when 'always'
+ true
+ when 'outdated'
+ outdated_by_merge? || outdated_by_closure? || old_version?
+ else
+ false # Disable external diffs if misconfigured
+ end
+ end
+
+ def outdated_by_merge?
+ return false unless merge_request&.metrics&.merged_at
+
+ merge_request.merged? && merge_request.metrics.merged_at < EXTERNAL_DIFF_CUTOFF.ago
+ end
+
+ def outdated_by_closure?
+ return false unless merge_request&.metrics&.latest_closed_at
+
+ merge_request.closed? && merge_request.metrics.latest_closed_at < EXTERNAL_DIFF_CUTOFF.ago
+ end
+
+ # We can't rely on `merge_request.latest_merge_request_diff_id` because that
+ # may have been changed in `save_git_content` without being reflected in
+ # the association's instance. This query is always subject to races, but
+ # the worst case is that we *don't* make a diff external when we could. The
+ # background worker will make it external at a later date.
+ def old_version?
+ latest_id = MergeRequest
+ .where(id: merge_request_id)
+ .limit(1)
+ .pluck(:latest_merge_request_diff_id)
+ .first
+
+ self.id != latest_id
+ end
+
def load_diffs(options)
# Ensure all diff files operate on the same external diff file instance if
# present. This reduces file open/close overhead.
@@ -394,7 +523,8 @@ class MergeRequestDiff < ActiveRecord::Base
if diff_collection.any?
new_attributes[:state] = :collected
- create_merge_request_diff_files(diff_collection)
+ rows = build_merge_request_diff_files(diff_collection)
+ create_merge_request_diff_files(rows)
end
# Set our state to 'overflow' to make the #empty? and #collected?
diff --git a/app/services/merge_requests/migrate_external_diffs_service.rb b/app/services/merge_requests/migrate_external_diffs_service.rb
new file mode 100644
index 00000000000..16050244637
--- /dev/null
+++ b/app/services/merge_requests/migrate_external_diffs_service.rb
@@ -0,0 +1,23 @@
+# frozen_string_literal: true
+
+module MergeRequests
+ class MigrateExternalDiffsService < ::BaseService
+ MAX_JOBS = 1000.freeze
+
+ attr_reader :diff
+
+ def self.enqueue!
+ ids = MergeRequestDiff.ids_for_external_storage_migration(limit: MAX_JOBS)
+
+ MigrateExternalDiffsWorker.bulk_perform_async(ids.map { |id| [id] })
+ end
+
+ def initialize(merge_request_diff)
+ @diff = merge_request_diff
+ end
+
+ def execute
+ diff.migrate_files_to_external_storage!
+ end
+ end
+end
diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml
index 6ebd756d3da..3e8c2a1209a 100644
--- a/app/workers/all_queues.yml
+++ b/app/workers/all_queues.yml
@@ -21,6 +21,7 @@
- cronjob:trending_projects
- cronjob:issue_due_scheduler
- cronjob:prune_web_hook_logs
+- cronjob:schedule_migrate_external_diffs
- gcp_cluster:cluster_install_app
- gcp_cluster:cluster_patch_app
@@ -119,6 +120,7 @@
- invalid_gpg_signature_update
- irker
- merge
+- migrate_external_diffs
- namespaceless_project_destroy
- new_issue
- new_merge_request
diff --git a/app/workers/migrate_external_diffs_worker.rb b/app/workers/migrate_external_diffs_worker.rb
new file mode 100644
index 00000000000..debac97af2c
--- /dev/null
+++ b/app/workers/migrate_external_diffs_worker.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: false
+
+class MigrateExternalDiffsWorker
+ include ApplicationWorker
+
+ def perform(merge_request_diff_id)
+ diff = MergeRequestDiff.find_by_id(merge_request_diff_id)
+ return unless diff
+
+ MergeRequests::MigrateExternalDiffsService.new(diff).execute
+ end
+end
diff --git a/app/workers/schedule_migrate_external_diffs_worker.rb b/app/workers/schedule_migrate_external_diffs_worker.rb
new file mode 100644
index 00000000000..70910f7ca04
--- /dev/null
+++ b/app/workers/schedule_migrate_external_diffs_worker.rb
@@ -0,0 +1,14 @@
+# frozen_string_literal: false
+
+class ScheduleMigrateExternalDiffsWorker
+ include ApplicationWorker
+ include CronjobQueue
+ include Gitlab::ExclusiveLeaseHelpers
+
+ def perform
+ in_lock(self.class.name.underscore, ttl: 2.hours, retries: 0) do
+ MergeRequests::MigrateExternalDiffsService.enqueue!
+ end
+ rescue Gitlab::ExclusiveLeaseHelpers::FailedToObtainLockError
+ end
+end
diff --git a/changelogs/unreleased/54670-external-diffs-when-outdated.yml b/changelogs/unreleased/54670-external-diffs-when-outdated.yml
new file mode 100644
index 00000000000..2a0b9e75cb4
--- /dev/null
+++ b/changelogs/unreleased/54670-external-diffs-when-outdated.yml
@@ -0,0 +1,5 @@
+---
+title: 'Allow external diffs to be used conditionally'
+merge_request: 25432
+author:
+type: added
diff --git a/config/gitlab.yml.example b/config/gitlab.yml.example
index eba7d2b9fb7..8d9b6624995 100644
--- a/config/gitlab.yml.example
+++ b/config/gitlab.yml.example
@@ -301,6 +301,10 @@ production: &base
pages_domain_verification_cron_worker:
cron: "*/15 * * * *"
+ # Periodically migrate diffs from the database to external storage
+ schedule_migrate_external_diffs_worker:
+ cron: "15 * * * *"
+
registry:
# enabled: true
# host: registry.example.com
@@ -787,6 +791,10 @@ test:
enabled: true
external_diffs:
enabled: false
+ # Diffs may be `always` external (the default), or they can be made external
+ # after they have become `outdated` (i.e., the MR is closed or a new version
+ # has been pushed).
+ # when: always
# The location where external diffs are stored (default: shared/external-diffs).
# storage_path: shared/external-diffs
object_store:
diff --git a/config/initializers/1_settings.rb b/config/initializers/1_settings.rb
index 99bdf5a95c2..01ffcade931 100644
--- a/config/initializers/1_settings.rb
+++ b/config/initializers/1_settings.rb
@@ -238,6 +238,7 @@ Settings.pages.admin['certificate'] ||= ''
#
Settings['external_diffs'] ||= Settingslogic.new({})
Settings.external_diffs['enabled'] = false if Settings.external_diffs['enabled'].nil?
+Settings.external_diffs['when'] = 'always' if Settings.external_diffs['when'].nil?
Settings.external_diffs['storage_path'] = Settings.absolute(Settings.external_diffs['storage_path'] || File.join(Settings.shared['path'], 'external-diffs'))
Settings.external_diffs['object_store'] = ObjectStoreSettings.parse(Settings.external_diffs['object_store'])
@@ -344,6 +345,10 @@ Settings.cron_jobs['prune_web_hook_logs_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['prune_web_hook_logs_worker']['cron'] ||= '0 */1 * * *'
Settings.cron_jobs['prune_web_hook_logs_worker']['job_class'] = 'PruneWebHookLogsWorker'
+Settings.cron_jobs['schedule_migrate_external_diffs_worker'] ||= Settingslogic.new({})
+Settings.cron_jobs['schedule_migrate_external_diffs_worker']['cron'] ||= '15 * * * *'
+Settings.cron_jobs['schedule_migrate_external_diffs_worker']['job_class'] = 'ScheduleMigrateExternalDiffsWorker'
+
#
# Sidekiq
#
diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml
index cef123b86ae..2dc0da00919 100644
--- a/config/sidekiq_queues.yml
+++ b/config/sidekiq_queues.yml
@@ -89,3 +89,4 @@
- [project_daily_statistics, 1]
- [import_issues_csv, 2]
- [chat_notification, 2]
+ - [migrate_external_diffs, 1]
diff --git a/db/migrate/20190222051615_add_indexes_for_merge_request_diffs_query.rb b/db/migrate/20190222051615_add_indexes_for_merge_request_diffs_query.rb
new file mode 100644
index 00000000000..0048268ca6f
--- /dev/null
+++ b/db/migrate/20190222051615_add_indexes_for_merge_request_diffs_query.rb
@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+
+class AddIndexesForMergeRequestDiffsQuery < ActiveRecord::Migration[5.0]
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ INDEX_SPECS = [
+ [
+ :merge_request_metrics,
+ :latest_closed_at,
+ { where: 'latest_closed_at IS NOT NULL' }
+ ],
+ [
+ :merge_request_metrics,
+ [:merge_request_id, :merged_at],
+ { where: 'merged_at IS NOT NULL' }
+ ],
+ [
+ :merge_request_diffs,
+ [:merge_request_id, :id],
+ {
+ name: 'index_merge_request_diffs_on_merge_request_id_and_id_partial',
+ where: 'NOT stored_externally OR stored_externally IS NULL'
+ }
+ ]
+ ].freeze
+
+ disable_ddl_transaction!
+
+ def up
+ INDEX_SPECS.each do |spec|
+ add_concurrent_index(*spec)
+ end
+ end
+
+ def down
+ INDEX_SPECS.reverse.each do |spec|
+ remove_concurrent_index(*spec)
+ end
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 0c997f3b8a2..eed9bb266fe 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -1256,6 +1256,7 @@ ActiveRecord::Schema.define(version: 20190322132835) do
t.integer "external_diff_store"
t.boolean "stored_externally"
t.index ["merge_request_id", "id"], name: "index_merge_request_diffs_on_merge_request_id_and_id", using: :btree
+ t.index ["merge_request_id", "id"], name: "index_merge_request_diffs_on_merge_request_id_and_id_partial", where: "((NOT stored_externally) OR (stored_externally IS NULL))", using: :btree
end
create_table "merge_request_metrics", force: :cascade do |t|
@@ -1271,7 +1272,9 @@ ActiveRecord::Schema.define(version: 20190322132835) do
t.integer "latest_closed_by_id"
t.datetime_with_timezone "latest_closed_at"
t.index ["first_deployed_to_production_at"], name: "index_merge_request_metrics_on_first_deployed_to_production_at", using: :btree
+ t.index ["latest_closed_at"], name: "index_merge_request_metrics_on_latest_closed_at", where: "(latest_closed_at IS NOT NULL)", using: :btree
t.index ["latest_closed_by_id"], name: "index_merge_request_metrics_on_latest_closed_by_id", using: :btree
+ t.index ["merge_request_id", "merged_at"], name: "index_merge_request_metrics_on_merge_request_id_and_merged_at", where: "(merged_at IS NOT NULL)", using: :btree
t.index ["merge_request_id"], name: "index_merge_request_metrics", using: :btree
t.index ["merged_by_id"], name: "index_merge_request_metrics_on_merged_by_id", using: :btree
t.index ["pipeline_id"], name: "index_merge_request_metrics_on_pipeline_id", using: :btree
diff --git a/doc/administration/merge_request_diffs.md b/doc/administration/merge_request_diffs.md
index c34a9519ace..5e9ba4f640f 100644
--- a/doc/administration/merge_request_diffs.md
+++ b/doc/administration/merge_request_diffs.md
@@ -145,3 +145,47 @@ The connection settings match those provided by [Fog](https://github.com/fog), a
```
1. Save the file and [restart GitLab](restart_gitlab.md#installations-from-source) for the changes to take effect.
+
+### Alternative in-database storage
+
+Enabling external diffs may reduce the performance of merge requests, as they
+must be retrieved in a separate operation to other data. A compromise may be
+reached by only storing outdated diffs externally, while keeping current diffs
+in the database.
+
+To enable this feature, perform the following steps:
+
+**In Omnibus installations:**
+
+1. Edit `/etc/gitlab/gitlab.rb` and add the following line:
+
+ ```ruby
+ gitlab_rails['external_diffs_when'] = 'outdated'
+ ```
+
+1. Save the file and [reconfigure GitLab](restart_gitlab.md#omnibus-gitlab-reconfigure) for the changes to take effect.
+
+**In installations from source:**
+
+1. Edit `/home/git/gitlab/config/gitlab.yml` and add or amend the following
+ lines:
+
+ ```yaml
+ external_diffs:
+ enabled: true
+ when: outdated
+ ```
+
+1. Save the file and [restart GitLab](restart_gitlab.md#installations-from-source) for the changes to take effect.
+
+With this feature enabled, diffs will initially stored in the database, rather
+than externally. They will be moved to external storage once any of these
+conditions become true:
+
+- A newer version of the merge request diff exists
+- The merge request was merged more than seven days ago
+- The merge request was closed more than seven day ago
+
+These rules strike a balance between space and performance by only storing
+frequently-accessed diffs in the database. Diffs that are less likely to be
+accessed are moved to external storage instead.
diff --git a/spec/factories/merge_requests.rb b/spec/factories/merge_requests.rb
index a73f330a7a9..a1809a26265 100644
--- a/spec/factories/merge_requests.rb
+++ b/spec/factories/merge_requests.rb
@@ -46,10 +46,26 @@ FactoryBot.define do
target_branch "improve/awesome"
end
+ trait :merged_last_month do
+ merged
+
+ after(:build) do |merge_request|
+ merge_request.build_metrics.merged_at = 1.month.ago
+ end
+ end
+
trait :closed do
state :closed
end
+ trait :closed_last_month do
+ closed
+
+ after(:build) do |merge_request|
+ merge_request.build_metrics.latest_closed_at = 1.month.ago
+ end
+ end
+
trait :opened do
state :opened
end
diff --git a/spec/models/merge_request_diff_spec.rb b/spec/models/merge_request_diff_spec.rb
index 53f5307ea0b..0f00ea7e85e 100644
--- a/spec/models/merge_request_diff_spec.rb
+++ b/spec/models/merge_request_diff_spec.rb
@@ -51,7 +51,104 @@ describe MergeRequestDiff do
end
end
- describe '#latest' do
+ describe '.ids_for_external_storage_migration' do
+ set(:merge_request) { create(:merge_request) }
+ set(:outdated) { merge_request.merge_request_diff }
+ set(:latest) { merge_request.create_merge_request_diff }
+
+ set(:closed_mr) { create(:merge_request, :closed_last_month) }
+ let(:closed) { closed_mr.merge_request_diff }
+
+ set(:merged_mr) { create(:merge_request, :merged_last_month) }
+ let(:merged) { merged_mr.merge_request_diff }
+
+ set(:recently_closed_mr) { create(:merge_request, :closed) }
+ let(:closed_recently) { recently_closed_mr.merge_request_diff }
+
+ set(:recently_merged_mr) { create(:merge_request, :merged) }
+ let(:merged_recently) { recently_merged_mr.merge_request_diff }
+
+ before do
+ merge_request.update!(latest_merge_request_diff: latest)
+ end
+
+ subject { described_class.ids_for_external_storage_migration(limit: 1000) }
+
+ context 'external diffs are disabled' do
+ before do
+ stub_external_diffs_setting(enabled: false)
+ end
+
+ it { is_expected.to be_empty }
+ end
+
+ context 'external diffs are misconfigured' do
+ before do
+ stub_external_diffs_setting(enabled: true, when: 'every second tuesday')
+ end
+
+ it { is_expected.to be_empty }
+ end
+
+ context 'external diffs are enabled unconditionally' do
+ before do
+ stub_external_diffs_setting(enabled: true)
+ end
+
+ it { is_expected.to contain_exactly(outdated.id, latest.id, closed.id, merged.id, closed_recently.id, merged_recently.id) }
+ end
+
+ context 'external diffs are enabled for outdated diffs' do
+ before do
+ stub_external_diffs_setting(enabled: true, when: 'outdated')
+ end
+
+ it 'returns records for outdated merge request versions' do
+ is_expected.to contain_exactly(outdated.id, closed.id, merged.id)
+ end
+ end
+
+ context 'with limit' do
+ it 'respects the limit' do
+ stub_external_diffs_setting(enabled: true)
+
+ expect(described_class.ids_for_external_storage_migration(limit: 3).count).to eq(3)
+ end
+ end
+ end
+
+ describe '#migrate_files_to_external_storage!' do
+ let(:diff) { create(:merge_request).merge_request_diff }
+
+ it 'converts from in-database to external storage' do
+ expect(diff).not_to be_stored_externally
+
+ stub_external_diffs_setting(enabled: true)
+ expect(diff).to receive(:save!)
+
+ diff.migrate_files_to_external_storage!
+
+ expect(diff).to be_stored_externally
+ end
+
+ it 'does nothing with an external diff' do
+ stub_external_diffs_setting(enabled: true)
+
+ expect(diff).to be_stored_externally
+ expect(diff).not_to receive(:save!)
+
+ diff.migrate_files_to_external_storage!
+ end
+
+ it 'does nothing if external diffs are disabled' do
+ expect(diff).not_to be_stored_externally
+ expect(diff).not_to receive(:save!)
+
+ diff.migrate_files_to_external_storage!
+ end
+ end
+
+ describe '#latest?' do
let!(:mr) { create(:merge_request, :with_diffs) }
let!(:first_diff) { mr.merge_request_diff }
let!(:last_diff) { mr.create_merge_request_diff }
@@ -222,14 +319,58 @@ describe MergeRequestDiff do
include_examples 'merge request diffs'
end
- describe 'external diffs configured' do
+ describe 'external diffs always enabled' do
before do
- stub_external_diffs_setting(enabled: true)
+ stub_external_diffs_setting(enabled: true, when: 'always')
end
include_examples 'merge request diffs'
end
+ describe 'exernal diffs enabled for outdated diffs' do
+ before do
+ stub_external_diffs_setting(enabled: true, when: 'outdated')
+ end
+
+ include_examples 'merge request diffs'
+
+ it 'stores up-to-date diffs in the database' do
+ expect(diff).not_to be_stored_externally
+ end
+
+ it 'stores diffs for recently closed MRs in the database' do
+ mr = create(:merge_request, :closed)
+
+ expect(mr.merge_request_diff).not_to be_stored_externally
+ end
+
+ it 'stores diffs for recently merged MRs in the database' do
+ mr = create(:merge_request, :merged)
+
+ expect(mr.merge_request_diff).not_to be_stored_externally
+ end
+
+ it 'stores diffs for old MR versions in external storage' do
+ old_diff = diff
+ merge_request.create_merge_request_diff
+ old_diff.migrate_files_to_external_storage!
+
+ expect(old_diff).to be_stored_externally
+ end
+
+ it 'stores diffs for old closed MRs in external storage' do
+ mr = create(:merge_request, :closed_last_month)
+
+ expect(mr.merge_request_diff).to be_stored_externally
+ end
+
+ it 'stores diffs for old merged MRs in external storage' do
+ mr = create(:merge_request, :merged_last_month)
+
+ expect(mr.merge_request_diff).to be_stored_externally
+ end
+ end
+
describe '#commit_shas' do
it 'returns all commit SHAs using commits from the DB' do
expect(diff_with_commits.commit_shas).not_to be_empty
diff --git a/spec/services/merge_requests/migrate_external_diffs_service_spec.rb b/spec/services/merge_requests/migrate_external_diffs_service_spec.rb
new file mode 100644
index 00000000000..40ac747e66f
--- /dev/null
+++ b/spec/services/merge_requests/migrate_external_diffs_service_spec.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe MergeRequests::MigrateExternalDiffsService do
+ let(:merge_request) { create(:merge_request) }
+ let(:diff) { merge_request.merge_request_diff }
+
+ describe '.enqueue!', :sidekiq do
+ around do |example|
+ Sidekiq::Testing.fake! { example.run }
+ end
+
+ it 'enqueues nothing if external diffs are disabled' do
+ expect(diff).not_to be_stored_externally
+
+ expect { described_class.enqueue! }
+ .not_to change { MigrateExternalDiffsWorker.jobs.count }
+ end
+
+ it 'enqueues eligible in-database diffs if external diffs are enabled' do
+ expect(diff).not_to be_stored_externally
+
+ stub_external_diffs_setting(enabled: true)
+
+ expect { described_class.enqueue! }
+ .to change { MigrateExternalDiffsWorker.jobs.count }
+ .by(1)
+ end
+ end
+
+ describe '#execute' do
+ it 'migrates an in-database diff to the external store' do
+ expect(diff).not_to be_stored_externally
+
+ stub_external_diffs_setting(enabled: true)
+
+ described_class.new(diff).execute
+
+ expect(diff).to be_stored_externally
+ end
+ end
+end
diff --git a/spec/workers/migrate_external_diffs_worker_spec.rb b/spec/workers/migrate_external_diffs_worker_spec.rb
new file mode 100644
index 00000000000..88d48cad14b
--- /dev/null
+++ b/spec/workers/migrate_external_diffs_worker_spec.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe MigrateExternalDiffsWorker do
+ let(:worker) { described_class.new }
+ let(:diff) { create(:merge_request).merge_request_diff }
+
+ describe '#perform' do
+ it 'migrates the listed diff' do
+ expect_next_instance_of(MergeRequests::MigrateExternalDiffsService) do |instance|
+ expect(instance.diff).to eq(diff)
+ expect(instance).to receive(:execute)
+ end
+
+ worker.perform(diff.id)
+ end
+
+ it 'does nothing if the diff is missing' do
+ diff.destroy
+
+ worker.perform(diff.id)
+ end
+ end
+end
diff --git a/spec/workers/schedule_migrate_external_diffs_worker_spec.rb b/spec/workers/schedule_migrate_external_diffs_worker_spec.rb
new file mode 100644
index 00000000000..9d6fecc9f4e
--- /dev/null
+++ b/spec/workers/schedule_migrate_external_diffs_worker_spec.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe ScheduleMigrateExternalDiffsWorker do
+ include ExclusiveLeaseHelpers
+
+ let(:worker) { described_class.new }
+
+ describe '#perform' do
+ it 'triggers a scan for diffs to migrate' do
+ expect(MergeRequests::MigrateExternalDiffsService).to receive(:enqueue!)
+
+ worker.perform
+ end
+
+ it 'will not run if the lease is already taken' do
+ stub_exclusive_lease_taken('schedule_migrate_external_diffs_worker', timeout: 2.hours)
+
+ expect(MergeRequests::MigrateExternalDiffsService).not_to receive(:enqueue!)
+
+ worker.perform
+ end
+ end
+end