summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorOswaldo Ferreira <oswaldo@gitlab.com>2018-07-06 12:23:51 -0300
committerOswaldo Ferreira <oswaldo@gitlab.com>2018-07-10 09:43:58 -0300
commitdb0f150516404c6e7951bc961eab45be11b5fcf5 (patch)
tree74792807be2e5b7d44740ee50ddcdd57e6e9c180 /lib
parent19966e7095fd6357183afb8e009c7f8e78a05591 (diff)
downloadgitlab-ce-db0f150516404c6e7951bc961eab45be11b5fcf5.tar.gz
Reschedule DeleteDiffFiles until there is none left to remove
Diffstat (limited to 'lib')
-rw-r--r--lib/gitlab/background_migration/delete_diff_files.rb63
1 files changed, 36 insertions, 27 deletions
diff --git a/lib/gitlab/background_migration/delete_diff_files.rb b/lib/gitlab/background_migration/delete_diff_files.rb
index 36161285aac..2043b1649dc 100644
--- a/lib/gitlab/background_migration/delete_diff_files.rb
+++ b/lib/gitlab/background_migration/delete_diff_files.rb
@@ -19,51 +19,60 @@ module Gitlab
include EachBatch
end
- BATCH = 5_000
+ DIFF_ROWS_LIMIT = 5_000
DEAD_TUPLES_THRESHOLD = 50_000
VACUUM_WAIT_TIME = 5.minutes
def perform
- diffs_with_files = MergeRequestDiff
- .joins(:merge_request)
- .where("merge_requests.state = 'merged'")
- .where('merge_requests.latest_merge_request_diff_id IS NOT NULL')
- .where('merge_requests.latest_merge_request_diff_id != merge_request_diffs.id')
- .where("merge_request_diffs.state NOT IN ('without_files', 'empty')")
-
- diffs_with_files.each_batch(of: BATCH) do |batch, index|
- wait_deadtuple_vacuum(index)
- prune_diff_files(batch, index)
+ rescheduling do
+ prune_diff_files(diffs_collection.limit(DIFF_ROWS_LIMIT))
end
end
- def wait_deadtuple_vacuum(index)
- db_klass = Gitlab::Database
+ def should_wait_deadtuple_vacuum?
+ return false unless Gitlab::Database.postgresql?
- if defined?(db_klass) && db_klass.respond_to?(:postgresql?) && db_klass.postgresql?
- while diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD
- log_info("Dead tuple threshold hit on merge_request_diff_files (#{index}th batch): " \
- "#{diff_files_dead_tuples_count}, waiting 5 minutes")
- sleep VACUUM_WAIT_TIME
- end
- end
+ diff_files_dead_tuples_count >= DEAD_TUPLES_THRESHOLD
end
private
+ def rescheduling(&block)
+ # We should reschedule until deadtuples get in a desirable
+ # state (e.g. < 50_000). That may take move than one reschedule.
+ #
+ if should_wait_deadtuple_vacuum?
+ reschedule
+ return
+ end
+
+ block.call
+
+ reschedule if diffs_collection.limit(1).count > 0
+ end
+
+ def reschedule
+ BackgroundMigrationWorker.perform_in(VACUUM_WAIT_TIME, self.class.name.demodulize)
+ end
+
+ def diffs_collection
+ MergeRequestDiff
+ .joins(:merge_request)
+ .where("merge_requests.state = 'merged'")
+ .where('merge_requests.latest_merge_request_diff_id IS NOT NULL')
+ .where('merge_requests.latest_merge_request_diff_id != merge_request_diffs.id')
+ .where("merge_request_diffs.state NOT IN ('without_files', 'empty')")
+ end
+
def diff_files_dead_tuples_count
dead_tuple =
execute_statement("SELECT n_dead_tup FROM pg_stat_all_tables "\
"WHERE relname = 'merge_request_diff_files'")[0]
- if dead_tuple.present?
- dead_tuple['n_dead_tup'].to_i
- else
- 0
- end
+ dead_tuple&.fetch('n_dead_tup', 0).to_i
end
- def prune_diff_files(batch, index)
+ def prune_diff_files(batch)
diff_ids = batch.pluck(:id)
removed = 0
@@ -76,7 +85,7 @@ module Gitlab
.delete_all
end
- log_info("#{index}th batch - Removed #{removed} merge_request_diff_files rows, "\
+ log_info("Removed #{removed} merge_request_diff_files rows, "\
"updated #{updated} merge_request_diffs rows")
end