summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2018-08-17 14:33:27 +0000
committerYorick Peterse <yorickpeterse@gmail.com>2018-08-17 14:33:27 +0000
commita58d0a0182d06efbbde57821e71c305518325d6f (patch)
tree4caf920f90d558f57fd2e858ecb709bc3fa177de
parentc2f14af75e9b65ae8d88d3d025ea63da7bd2106b (diff)
parent729de4f1ba04f1a9a44b7f3f3b9bd5fb9165e4ca (diff)
downloadgitlab-ce-a58d0a0182d06efbbde57821e71c305518325d6f.tar.gz
Merge branch 'ab-49754-gh-importer-internal-ids' into 'master'
GitHub importer: Keep track of internal_ids Closes #49754 See merge request gitlab-org/gitlab-ce!20926
-rw-r--r--app/models/internal_id.rb6
-rw-r--r--changelogs/unreleased/ab-49446-internal-ids-inconsistency.yml5
-rw-r--r--db/post_migrate/20180723130817_delete_inconsistent_internal_id_records.rb47
-rw-r--r--lib/gitlab/github_import/bulk_importing.rb4
-rw-r--r--lib/gitlab/github_import/importer/issue_importer.rb6
-rw-r--r--lib/gitlab/github_import/importer/milestones_importer.rb12
-rw-r--r--lib/gitlab/github_import/importer/pull_request_importer.rb8
-rw-r--r--spec/lib/gitlab/github_import/bulk_importing_spec.rb12
-rw-r--r--spec/lib/gitlab/github_import/importer/issue_importer_spec.rb22
-rw-r--r--spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb14
-rw-r--r--spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb10
-rw-r--r--spec/migrations/delete_inconsistent_internal_id_records_spec.rb119
12 files changed, 257 insertions, 8 deletions
diff --git a/app/models/internal_id.rb b/app/models/internal_id.rb
index 4eb211eff61..e7168d49db9 100644
--- a/app/models/internal_id.rb
+++ b/app/models/internal_id.rb
@@ -111,7 +111,7 @@ class InternalId < ActiveRecord::Base
# Generates next internal id and returns it
def generate
- subject.transaction do
+ InternalId.transaction do
# Create a record in internal_ids if one does not yet exist
# and increment its last value
#
@@ -125,7 +125,7 @@ class InternalId < ActiveRecord::Base
#
# Note this will acquire a ROW SHARE lock on the InternalId record
def track_greatest(new_value)
- subject.transaction do
+ InternalId.transaction do
(lookup || create_record).track_greatest_and_save!(new_value)
end
end
@@ -148,7 +148,7 @@ class InternalId < ActiveRecord::Base
# violation. We can safely roll-back the nested transaction and perform
# a lookup instead to retrieve the record.
def create_record
- subject.transaction(requires_new: true) do
+ InternalId.transaction(requires_new: true) do
InternalId.create!(
**scope,
usage: usage_value,
diff --git a/changelogs/unreleased/ab-49446-internal-ids-inconsistency.yml b/changelogs/unreleased/ab-49446-internal-ids-inconsistency.yml
new file mode 100644
index 00000000000..bfea57d79e0
--- /dev/null
+++ b/changelogs/unreleased/ab-49446-internal-ids-inconsistency.yml
@@ -0,0 +1,5 @@
+---
+title: Add migration to cleanup internal_ids inconsistency.
+merge_request: 20926
+author:
+type: fixed
diff --git a/db/post_migrate/20180723130817_delete_inconsistent_internal_id_records.rb b/db/post_migrate/20180723130817_delete_inconsistent_internal_id_records.rb
new file mode 100644
index 00000000000..3b9b95ec9ca
--- /dev/null
+++ b/db/post_migrate/20180723130817_delete_inconsistent_internal_id_records.rb
@@ -0,0 +1,47 @@
+# frozen_string_literal: true
+class DeleteInconsistentInternalIdRecords < ActiveRecord::Migration
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ disable_ddl_transaction!
+
+ # This migration cleans up any inconsistent records in internal_ids.
+ #
+ # That is, it deletes records that track a `last_value` that is
+ # smaller than the maximum internal id (usually `iid`) found in
+ # the corresponding model records.
+
+ def up
+ disable_statement_timeout do
+ delete_internal_id_records('issues', 'project_id')
+ delete_internal_id_records('merge_requests', 'project_id', 'target_project_id')
+ delete_internal_id_records('deployments', 'project_id')
+ delete_internal_id_records('milestones', 'project_id')
+ delete_internal_id_records('milestones', 'namespace_id', 'group_id')
+ delete_internal_id_records('ci_pipelines', 'project_id')
+ end
+ end
+
+ class InternalId < ActiveRecord::Base
+ self.table_name = 'internal_ids'
+ enum usage: { issues: 0, merge_requests: 1, deployments: 2, milestones: 3, epics: 4, ci_pipelines: 5 }
+ end
+
+ private
+
+ def delete_internal_id_records(base_table, scope_column_name, base_scope_column_name = scope_column_name)
+ sql = <<~SQL
+ SELECT id FROM ( -- workaround for MySQL
+ SELECT internal_ids.id FROM (
+ SELECT #{base_scope_column_name} AS #{scope_column_name}, max(iid) as maximum_iid from #{base_table} GROUP BY #{scope_column_name}
+ ) maxima JOIN internal_ids USING (#{scope_column_name})
+ WHERE internal_ids.usage=#{InternalId.usages.fetch(base_table)} AND maxima.maximum_iid > internal_ids.last_value
+ ) internal_ids
+ SQL
+
+ InternalId.where("id IN (#{sql})").tap do |ids| # rubocop:disable GitlabSecurity/SqlInjection
+ say "Deleting internal_id records for #{base_table}: #{ids.pluck(:project_id, :last_value)}" unless ids.empty?
+ end.delete_all
+ end
+end
diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb
index 147597289cf..da2f96b5c4b 100644
--- a/lib/gitlab/github_import/bulk_importing.rb
+++ b/lib/gitlab/github_import/bulk_importing.rb
@@ -15,10 +15,12 @@ module Gitlab
end
# Bulk inserts the given rows into the database.
- def bulk_insert(model, rows, batch_size: 100)
+ def bulk_insert(model, rows, batch_size: 100, pre_hook: nil)
rows.each_slice(batch_size) do |slice|
+ pre_hook.call(slice) if pre_hook
Gitlab::Database.bulk_insert(model.table_name, slice)
end
+ rows
end
end
end
diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb
index 31fefebf787..ead4215810f 100644
--- a/lib/gitlab/github_import/importer/issue_importer.rb
+++ b/lib/gitlab/github_import/importer/issue_importer.rb
@@ -55,7 +55,11 @@ module Gitlab
updated_at: issue.updated_at
}
- GithubImport.insert_and_return_id(attributes, project.issues)
+ GithubImport.insert_and_return_id(attributes, project.issues).tap do |id|
+ # We use .insert_and_return_id which effectively disables all callbacks.
+ # Trigger iid logic here to make sure we track internal id values consistently.
+ project.issues.find(id).ensure_project_iid!
+ end
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
# job. In this case we'll just skip creating the issue.
diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb
index c53480e828a..94eb9136b9a 100644
--- a/lib/gitlab/github_import/importer/milestones_importer.rb
+++ b/lib/gitlab/github_import/importer/milestones_importer.rb
@@ -17,10 +17,20 @@ module Gitlab
end
def execute
- bulk_insert(Milestone, build_milestones)
+ # We insert records in bulk, by-passing any standard model callbacks.
+ # The pre_hook here makes sure we track internal ids consistently.
+ # Note this has to be called before performing an insert of a batch
+ # because we're outside a transaction scope here.
+ bulk_insert(Milestone, build_milestones, pre_hook: method(:track_greatest_iid))
build_milestones_cache
end
+ def track_greatest_iid(slice)
+ greatest_iid = slice.max { |e| e[:iid] }[:iid]
+
+ InternalId.track_greatest(nil, { project: project }, :milestones, greatest_iid, ->(_) { project.milestones.maximum(:iid) })
+ end
+
def build_milestones
build_database_rows(each_milestone)
end
diff --git a/lib/gitlab/github_import/importer/pull_request_importer.rb b/lib/gitlab/github_import/importer/pull_request_importer.rb
index 6b3688c4381..e4b49d2143a 100644
--- a/lib/gitlab/github_import/importer/pull_request_importer.rb
+++ b/lib/gitlab/github_import/importer/pull_request_importer.rb
@@ -76,7 +76,13 @@ module Gitlab
merge_request_id = GithubImport
.insert_and_return_id(attributes, project.merge_requests)
- [project.merge_requests.find(merge_request_id), false]
+ merge_request = project.merge_requests.find(merge_request_id)
+
+ # We use .insert_and_return_id which effectively disables all callbacks.
+ # Trigger iid logic here to make sure we track internal id values consistently.
+ merge_request.ensure_target_project_iid!
+
+ [merge_request, false]
end
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
diff --git a/spec/lib/gitlab/github_import/bulk_importing_spec.rb b/spec/lib/gitlab/github_import/bulk_importing_spec.rb
index 91229d9c7d4..861710f7e9b 100644
--- a/spec/lib/gitlab/github_import/bulk_importing_spec.rb
+++ b/spec/lib/gitlab/github_import/bulk_importing_spec.rb
@@ -58,5 +58,17 @@ describe Gitlab::GithubImport::BulkImporting do
importer.bulk_insert(model, rows, batch_size: 5)
end
+
+ it 'calls pre_hook for each slice if given' do
+ rows = [{ title: 'Foo' }] * 10
+ model = double(:model, table_name: 'kittens')
+ pre_hook = double('pre_hook', call: nil)
+ allow(Gitlab::Database).to receive(:bulk_insert)
+
+ expect(pre_hook).to receive(:call).with(rows[0..4])
+ expect(pre_hook).to receive(:call).with(rows[5..9])
+
+ importer.bulk_insert(model, rows, batch_size: 5, pre_hook: pre_hook)
+ end
end
end
diff --git a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
index 81fe97c1e49..3f7a12144d5 100644
--- a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
@@ -78,6 +78,11 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
.to receive(:id_for)
.with(issue)
.and_return(milestone.id)
+
+ allow(importer.user_finder)
+ .to receive(:author_id_for)
+ .with(issue)
+ .and_return([user.id, true])
end
context 'when the issue author could be found' do
@@ -172,6 +177,23 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
expect(importer.create_issue).to be_a_kind_of(Numeric)
end
+
+ it 'triggers internal_id functionality to track greatest iids' do
+ allow(importer.user_finder)
+ .to receive(:author_id_for)
+ .with(issue)
+ .and_return([user.id, true])
+
+ issue = build_stubbed(:issue, project: project)
+ allow(Gitlab::GithubImport)
+ .to receive(:insert_and_return_id)
+ .and_return(issue.id)
+ allow(project.issues).to receive(:find).with(issue.id).and_return(issue)
+
+ expect(issue).to receive(:ensure_project_iid!)
+
+ importer.create_issue
+ end
end
describe '#create_assignees' do
diff --git a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb
index b1cac3b6e46..db0be760c7b 100644
--- a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb
@@ -29,13 +29,25 @@ describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis
expect(importer)
.to receive(:bulk_insert)
- .with(Milestone, [milestone_hash])
+ .with(Milestone, [milestone_hash], any_args)
expect(importer)
.to receive(:build_milestones_cache)
importer.execute
end
+
+ it 'tracks internal ids' do
+ milestone_hash = { iid: 1, title: '1.0', project_id: project.id }
+ allow(importer)
+ .to receive(:build_milestones)
+ .and_return([milestone_hash])
+
+ expect(InternalId).to receive(:track_greatest)
+ .with(nil, { project: project }, :milestones, 1, any_args)
+
+ importer.execute
+ end
end
describe '#build_milestones' do
diff --git a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
index 3422a1e82fc..44c920043b4 100644
--- a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
@@ -111,6 +111,16 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
expect(mr).to be_instance_of(MergeRequest)
expect(exists).to eq(false)
end
+
+ it 'triggers internal_id functionality to track greatest iids' do
+ mr = build_stubbed(:merge_request, source_project: project, target_project: project)
+ allow(Gitlab::GithubImport).to receive(:insert_and_return_id).and_return(mr.id)
+ allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr)
+
+ expect(mr).to receive(:ensure_target_project_iid!)
+
+ importer.create_merge_request
+ end
end
context 'when the author could not be found' do
diff --git a/spec/migrations/delete_inconsistent_internal_id_records_spec.rb b/spec/migrations/delete_inconsistent_internal_id_records_spec.rb
new file mode 100644
index 00000000000..becb71cf427
--- /dev/null
+++ b/spec/migrations/delete_inconsistent_internal_id_records_spec.rb
@@ -0,0 +1,119 @@
+# frozen_string_literal: true
+# rubocop:disable RSpec/FactoriesInMigrationSpecs
+require 'spec_helper'
+require Rails.root.join('db', 'post_migrate', '20180723130817_delete_inconsistent_internal_id_records.rb')
+
+describe DeleteInconsistentInternalIdRecords, :migration do
+ let!(:project1) { create(:project) }
+ let!(:project2) { create(:project) }
+ let!(:project3) { create(:project) }
+
+ let(:internal_id_query) { ->(project) { InternalId.where(usage: InternalId.usages[scope.to_s.tableize], project: project) } }
+
+ let(:create_models) do
+ 3.times { create(scope, project: project1) }
+ 3.times { create(scope, project: project2) }
+ 3.times { create(scope, project: project3) }
+ end
+
+ shared_examples_for 'deleting inconsistent internal_id records' do
+ before do
+ create_models
+
+ internal_id_query.call(project1).first.tap do |iid|
+ iid.last_value = iid.last_value - 2
+ # This is an inconsistent record
+ iid.save!
+ end
+
+ internal_id_query.call(project3).first.tap do |iid|
+ iid.last_value = iid.last_value + 2
+ # This is a consistent record
+ iid.save!
+ end
+ end
+
+ it "deletes inconsistent issues" do
+ expect { migrate! }.to change { internal_id_query.call(project1).size }.from(1).to(0)
+ end
+
+ it "retains consistent issues" do
+ expect { migrate! }.not_to change { internal_id_query.call(project2).size }
+ end
+
+ it "retains consistent records, especially those with a greater last_value" do
+ expect { migrate! }.not_to change { internal_id_query.call(project3).size }
+ end
+ end
+
+ context 'for issues' do
+ let(:scope) { :issue }
+ it_behaves_like 'deleting inconsistent internal_id records'
+ end
+
+ context 'for merge_requests' do
+ let(:scope) { :merge_request }
+
+ let(:create_models) do
+ 3.times { |i| create(scope, target_project: project1, source_project: project1, source_branch: i.to_s) }
+ 3.times { |i| create(scope, target_project: project2, source_project: project2, source_branch: i.to_s) }
+ 3.times { |i| create(scope, target_project: project3, source_project: project3, source_branch: i.to_s) }
+ end
+
+ it_behaves_like 'deleting inconsistent internal_id records'
+ end
+
+ context 'for deployments' do
+ let(:scope) { :deployment }
+ it_behaves_like 'deleting inconsistent internal_id records'
+ end
+
+ context 'for milestones (by project)' do
+ let(:scope) { :milestone }
+ it_behaves_like 'deleting inconsistent internal_id records'
+ end
+
+ context 'for ci_pipelines' do
+ let(:scope) { :ci_pipeline }
+ it_behaves_like 'deleting inconsistent internal_id records'
+ end
+
+ context 'for milestones (by group)' do
+ # milestones (by group) is a little different than all of the other models
+ let!(:group1) { create(:group) }
+ let!(:group2) { create(:group) }
+ let!(:group3) { create(:group) }
+
+ let(:internal_id_query) { ->(group) { InternalId.where(usage: InternalId.usages['milestones'], namespace: group) } }
+
+ before do
+ 3.times { create(:milestone, group: group1) }
+ 3.times { create(:milestone, group: group2) }
+ 3.times { create(:milestone, group: group3) }
+
+ internal_id_query.call(group1).first.tap do |iid|
+ iid.last_value = iid.last_value - 2
+ # This is an inconsistent record
+ iid.save!
+ end
+
+ internal_id_query.call(group3).first.tap do |iid|
+ iid.last_value = iid.last_value + 2
+ # This is a consistent record
+ iid.save!
+ end
+ end
+
+ it "deletes inconsistent issues" do
+ expect { migrate! }.to change { internal_id_query.call(group1).size }.from(1).to(0)
+ end
+
+ it "retains consistent issues" do
+ expect { migrate! }.not_to change { internal_id_query.call(group2).size }
+ end
+
+ it "retains consistent records, especially those with a greater last_value" do
+ expect { migrate! }.not_to change { internal_id_query.call(group3).size }
+ end
+ end
+end