summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2018-11-04 05:59:13 -0800
committerStan Hu <stanhu@gmail.com>2018-11-04 06:01:17 -0800
commit5c8ce94052fab07c979f6b1ecef9f7b807909b94 (patch)
treeffe0a839038dc2accdae13a7fc6d7b763861f2b4
parent4d3ff28a6a0d81f44ccb3eb1602996e5d7c3de1c (diff)
downloadgitlab-ce-5c8ce94052fab07c979f6b1ecef9f7b807909b94.tar.gz
Fix statement timeouts in RemoveRestrictedTodos migration
On GitLab.com, the RemoveRestrictedTodos background migration encountered about 700+ failures a day due to statement timeouts. PostgreSQL might perform badly with a LIMIT 1 because the planner is guessing that scanning the index in ID order will come across the desired row in less time it will take the planner than using another index. The order_hint does not affect the search results. For example, `ORDER BY id ASC, updated_at ASC` means the same thing as `ORDER BY id ASC`. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/52649
-rw-r--r--app/models/concerns/each_batch.rb17
-rw-r--r--changelogs/unreleased/sh-fix-issue-52649.yml5
-rw-r--r--lib/gitlab/background_migration/remove_restricted_todos.rb2
-rw-r--r--spec/models/concerns/each_batch_spec.rb51
4 files changed, 49 insertions, 26 deletions
diff --git a/app/models/concerns/each_batch.rb b/app/models/concerns/each_batch.rb
index 8cf0b8b154d..6314b46a7e3 100644
--- a/app/models/concerns/each_batch.rb
+++ b/app/models/concerns/each_batch.rb
@@ -39,7 +39,15 @@ module EachBatch
#
# of - The number of rows to retrieve per batch.
# column - The column to use for ordering the batches.
- def each_batch(of: 1000, column: primary_key)
+ # order_hint - An optional column to append to the `ORDER BY id`
+ # clause to help the query planner. PostgreSQL might perform badly
+ # with a LIMIT 1 because the planner is guessing that scanning the
+ # index in ID order will come across the desired row in less time
+ # it will take the planner than using another index. The
+ # order_hint does not affect the search results. For example,
+ # `ORDER BY id ASC, updated_at ASC` means the same thing as `ORDER
+ # BY id ASC`.
+ def each_batch(of: 1000, column: primary_key, order_hint: nil)
unless column
raise ArgumentError,
'the column: argument must be set to a column name to use for ordering rows'
@@ -48,7 +56,9 @@ module EachBatch
start = except(:select)
.select(column)
.reorder(column => :asc)
- .take
+
+ start = start.order(order_hint) if order_hint
+ start = start.take
return unless start
@@ -60,6 +70,9 @@ module EachBatch
.select(column)
.where(arel_table[column].gteq(start_id))
.reorder(column => :asc)
+
+ stop = stop.order(order_hint) if order_hint
+ stop = stop
.offset(of)
.limit(1)
.take
diff --git a/changelogs/unreleased/sh-fix-issue-52649.yml b/changelogs/unreleased/sh-fix-issue-52649.yml
new file mode 100644
index 00000000000..34b7f74a345
--- /dev/null
+++ b/changelogs/unreleased/sh-fix-issue-52649.yml
@@ -0,0 +1,5 @@
+---
+title: Fix statement timeouts in RemoveRestrictedTodos migration
+merge_request: 22795
+author:
+type: other
diff --git a/lib/gitlab/background_migration/remove_restricted_todos.rb b/lib/gitlab/background_migration/remove_restricted_todos.rb
index 9941c2fe6d9..47579d46c1b 100644
--- a/lib/gitlab/background_migration/remove_restricted_todos.rb
+++ b/lib/gitlab/background_migration/remove_restricted_todos.rb
@@ -67,7 +67,7 @@ module Gitlab
.where('access_level >= ?', 20)
confidential_issues = Issue.select(:id, :author_id).where(confidential: true, project_id: project_id)
- confidential_issues.each_batch(of: 100) do |batch|
+ confidential_issues.each_batch(of: 100, order_hint: :confidential) do |batch|
batch.each do |issue|
assigned_users = IssueAssignee.select(:user_id).where(issue_id: issue.id)
diff --git a/spec/models/concerns/each_batch_spec.rb b/spec/models/concerns/each_batch_spec.rb
index 951690a217b..17224c09693 100644
--- a/spec/models/concerns/each_batch_spec.rb
+++ b/spec/models/concerns/each_batch_spec.rb
@@ -14,40 +14,45 @@ describe EachBatch do
5.times { create(:user, updated_at: 1.day.ago) }
end
- it 'yields an ActiveRecord::Relation when a block is given' do
- model.each_batch do |relation|
- expect(relation).to be_a_kind_of(ActiveRecord::Relation)
+ shared_examples 'each_batch handling' do |kwargs|
+ it 'yields an ActiveRecord::Relation when a block is given' do
+ model.each_batch(kwargs) do |relation|
+ expect(relation).to be_a_kind_of(ActiveRecord::Relation)
+ end
end
- end
- it 'yields a batch index as the second argument' do
- model.each_batch do |_, index|
- expect(index).to eq(1)
+ it 'yields a batch index as the second argument' do
+ model.each_batch(kwargs) do |_, index|
+ expect(index).to eq(1)
+ end
end
- end
- it 'accepts a custom batch size' do
- amount = 0
+ it 'accepts a custom batch size' do
+ amount = 0
- model.each_batch(of: 1) { amount += 1 }
+ model.each_batch(kwargs.merge({ of: 1 })) { amount += 1 }
- expect(amount).to eq(5)
- end
+ expect(amount).to eq(5)
+ end
- it 'does not include ORDER BYs in the yielded relations' do
- model.each_batch do |relation|
- expect(relation.to_sql).not_to include('ORDER BY')
+ it 'does not include ORDER BYs in the yielded relations' do
+ model.each_batch do |relation|
+ expect(relation.to_sql).not_to include('ORDER BY')
+ end
end
- end
- it 'allows updating of the yielded relations' do
- time = Time.now
+ it 'allows updating of the yielded relations' do
+ time = Time.now
- model.each_batch do |relation|
- relation.update_all(updated_at: time)
- end
+ model.each_batch do |relation|
+ relation.update_all(updated_at: time)
+ end
- expect(model.where(updated_at: time).count).to eq(5)
+ expect(model.where(updated_at: time).count).to eq(5)
+ end
end
+
+ it_behaves_like 'each_batch handling', {}
+ it_behaves_like 'each_batch handling', { order_hint: :updated_at }
end
end