summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYorick Peterse <yorickpeterse@gmail.com>2019-01-29 16:50:18 +0000
committerYorick Peterse <yorickpeterse@gmail.com>2019-01-29 16:50:18 +0000
commit2d24dca485f57f3f7ecf0da0cbf07a566d90a98c (patch)
tree9b9e0ca4fd28ef590e882560144c8a68281b5952
parent3a9c9e61c9dff92e76a7fef68c8b4e6a974f3ee6 (diff)
parentf82eea8628770ce8451bdaf4819cd3879c163853 (diff)
downloadgitlab-ce-2d24dca485f57f3f7ecf0da0cbf07a566d90a98c.tar.gz
Merge branch 'ab-54270-github-iid' into 'master'
Reduce amount of locks needed for GitHub importer Closes #54270 and #51817 See merge request gitlab-org/gitlab-ce!24102
-rw-r--r--app/models/internal_id.rb17
-rw-r--r--app/models/project.rb7
-rw-r--r--changelogs/unreleased/ab-54270-github-iid.yml5
-rw-r--r--db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb43
-rw-r--r--lib/gitlab/github_import/bulk_importing.rb4
-rw-r--r--lib/gitlab/github_import/importer/issue_importer.rb6
-rw-r--r--lib/gitlab/github_import/importer/milestones_importer.rb12
-rw-r--r--lib/gitlab/import/merge_request_helpers.rb4
-rw-r--r--spec/lib/gitlab/github_import/bulk_importing_spec.rb12
-rw-r--r--spec/lib/gitlab/github_import/importer/issue_importer_spec.rb22
-rw-r--r--spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb14
-rw-r--r--spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb10
-rw-r--r--spec/models/internal_id_spec.rb23
-rw-r--r--spec/models/project_spec.rb1
14 files changed, 97 insertions, 83 deletions
diff --git a/app/models/internal_id.rb b/app/models/internal_id.rb
index e7168d49db9..e75c6eb2331 100644
--- a/app/models/internal_id.rb
+++ b/app/models/internal_id.rb
@@ -66,6 +66,17 @@ class InternalId < ActiveRecord::Base
InternalIdGenerator.new(subject, scope, usage, init).generate
end
+ # Flushing records is generally safe in a sense that those
+ # records are going to be re-created when needed.
+ #
+ # A filter condition has to be provided to not accidentally flush
+ # records for all projects.
+ def flush_records!(filter)
+ raise ArgumentError, "filter cannot be empty" if filter.blank?
+
+ where(filter).delete_all
+ end
+
def available?
@available_flag ||= ActiveRecord::Migrator.current_version >= REQUIRED_SCHEMA_VERSION # rubocop:disable Gitlab/PredicateMemoization
end
@@ -111,7 +122,7 @@ class InternalId < ActiveRecord::Base
# Generates next internal id and returns it
def generate
- InternalId.transaction do
+ subject.transaction do
# Create a record in internal_ids if one does not yet exist
# and increment its last value
#
@@ -125,7 +136,7 @@ class InternalId < ActiveRecord::Base
#
# Note this will acquire a ROW SHARE lock on the InternalId record
def track_greatest(new_value)
- InternalId.transaction do
+ subject.transaction do
(lookup || create_record).track_greatest_and_save!(new_value)
end
end
@@ -148,7 +159,7 @@ class InternalId < ActiveRecord::Base
# violation. We can safely roll-back the nested transaction and perform
# a lookup instead to retrieve the record.
def create_record
- InternalId.transaction(requires_new: true) do
+ subject.transaction(requires_new: true) do
InternalId.create!(
**scope,
usage: usage_value,
diff --git a/app/models/project.rb b/app/models/project.rb
index b509120ff83..da77479fe1f 100644
--- a/app/models/project.rb
+++ b/app/models/project.rb
@@ -1585,6 +1585,13 @@ class Project < ActiveRecord::Base
def after_import
repository.after_import
wiki.repository.after_import
+
+ # The import assigns iid values on its own, e.g. by re-using GitHub ids.
+ # Flush existing InternalId records for this project for consistency reasons.
+ # Those records are going to be recreated with the next normal creation
+ # of a model instance (e.g. an Issue).
+ InternalId.flush_records!(project: self)
+
import_state.finish
import_state.remove_jid
update_project_counter_caches
diff --git a/changelogs/unreleased/ab-54270-github-iid.yml b/changelogs/unreleased/ab-54270-github-iid.yml
new file mode 100644
index 00000000000..1776b0aeb86
--- /dev/null
+++ b/changelogs/unreleased/ab-54270-github-iid.yml
@@ -0,0 +1,5 @@
+---
+title: Improve efficiency of GitHub importer by reducing amount of locks needed.
+merge_request: 24102
+author:
+type: performance
diff --git a/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb b/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb
new file mode 100644
index 00000000000..ddcddcf72a3
--- /dev/null
+++ b/db/post_migrate/20190102152410_delete_inconsistent_internal_id_records2.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+class DeleteInconsistentInternalIdRecords2 < ActiveRecord::Migration[5.0]
+ include Gitlab::Database::MigrationHelpers
+
+ DOWNTIME = false
+
+ disable_ddl_transaction!
+
+ # This migration cleans up any inconsistent records in internal_ids.
+ #
+ # That is, it deletes records that track a `last_value` that is
+ # smaller than the maximum internal id (usually `iid`) found in
+ # the corresponding model records.
+
+ def up
+ disable_statement_timeout do
+ delete_internal_id_records('milestones', 'project_id')
+ delete_internal_id_records('milestones', 'namespace_id', 'group_id')
+ end
+ end
+
+ class InternalId < ActiveRecord::Base
+ self.table_name = 'internal_ids'
+ enum usage: { issues: 0, merge_requests: 1, deployments: 2, milestones: 3, epics: 4, ci_pipelines: 5 }
+ end
+
+ private
+
+ def delete_internal_id_records(base_table, scope_column_name, base_scope_column_name = scope_column_name)
+ sql = <<~SQL
+ SELECT id FROM ( -- workaround for MySQL
+ SELECT internal_ids.id FROM (
+ SELECT #{base_scope_column_name} AS #{scope_column_name}, max(iid) as maximum_iid from #{base_table} GROUP BY #{scope_column_name}
+ ) maxima JOIN internal_ids USING (#{scope_column_name})
+ WHERE internal_ids.usage=#{InternalId.usages.fetch(base_table)} AND maxima.maximum_iid > internal_ids.last_value
+ ) internal_ids
+ SQL
+
+ InternalId.where("id IN (#{sql})").tap do |ids| # rubocop:disable GitlabSecurity/SqlInjection
+ say "Deleting internal_id records for #{base_table}: #{ids.map { |i| [i.project_id, i.last_value] }}" unless ids.empty?
+ end.delete_all
+ end
+end
diff --git a/lib/gitlab/github_import/bulk_importing.rb b/lib/gitlab/github_import/bulk_importing.rb
index da2f96b5c4b..147597289cf 100644
--- a/lib/gitlab/github_import/bulk_importing.rb
+++ b/lib/gitlab/github_import/bulk_importing.rb
@@ -15,12 +15,10 @@ module Gitlab
end
# Bulk inserts the given rows into the database.
- def bulk_insert(model, rows, batch_size: 100, pre_hook: nil)
+ def bulk_insert(model, rows, batch_size: 100)
rows.each_slice(batch_size) do |slice|
- pre_hook.call(slice) if pre_hook
Gitlab::Database.bulk_insert(model.table_name, slice)
end
- rows
end
end
end
diff --git a/lib/gitlab/github_import/importer/issue_importer.rb b/lib/gitlab/github_import/importer/issue_importer.rb
index 4226eee85cc..656d46b6a7d 100644
--- a/lib/gitlab/github_import/importer/issue_importer.rb
+++ b/lib/gitlab/github_import/importer/issue_importer.rb
@@ -57,11 +57,7 @@ module Gitlab
updated_at: issue.updated_at
}
- insert_and_return_id(attributes, project.issues).tap do |id|
- # We use .insert_and_return_id which effectively disables all callbacks.
- # Trigger iid logic here to make sure we track internal id values consistently.
- project.issues.find(id).ensure_project_iid!
- end
+ insert_and_return_id(attributes, project.issues)
rescue ActiveRecord::InvalidForeignKey
# It's possible the project has been deleted since scheduling this
# job. In this case we'll just skip creating the issue.
diff --git a/lib/gitlab/github_import/importer/milestones_importer.rb b/lib/gitlab/github_import/importer/milestones_importer.rb
index 8d54b27374c..87cf2c8b598 100644
--- a/lib/gitlab/github_import/importer/milestones_importer.rb
+++ b/lib/gitlab/github_import/importer/milestones_importer.rb
@@ -19,20 +19,10 @@ module Gitlab
# rubocop: enable CodeReuse/ActiveRecord
def execute
- # We insert records in bulk, by-passing any standard model callbacks.
- # The pre_hook here makes sure we track internal ids consistently.
- # Note this has to be called before performing an insert of a batch
- # because we're outside a transaction scope here.
- bulk_insert(Milestone, build_milestones, pre_hook: method(:track_greatest_iid))
+ bulk_insert(Milestone, build_milestones)
build_milestones_cache
end
- def track_greatest_iid(slice)
- greatest_iid = slice.max { |e| e[:iid] }[:iid]
-
- InternalId.track_greatest(nil, { project: project }, :milestones, greatest_iid, ->(_) { project.milestones.maximum(:iid) })
- end
-
def build_milestones
build_database_rows(each_milestone)
end
diff --git a/lib/gitlab/import/merge_request_helpers.rb b/lib/gitlab/import/merge_request_helpers.rb
index 9215067d973..fa3ff6c3f12 100644
--- a/lib/gitlab/import/merge_request_helpers.rb
+++ b/lib/gitlab/import/merge_request_helpers.rb
@@ -24,10 +24,6 @@ module Gitlab
merge_request = project.merge_requests.reload.find(merge_request_id)
- # We use .insert_and_return_id which effectively disables all callbacks.
- # Trigger iid logic here to make sure we track internal id values consistently.
- merge_request.ensure_target_project_iid!
-
[merge_request, false]
end
rescue ActiveRecord::InvalidForeignKey
diff --git a/spec/lib/gitlab/github_import/bulk_importing_spec.rb b/spec/lib/gitlab/github_import/bulk_importing_spec.rb
index 861710f7e9b..91229d9c7d4 100644
--- a/spec/lib/gitlab/github_import/bulk_importing_spec.rb
+++ b/spec/lib/gitlab/github_import/bulk_importing_spec.rb
@@ -58,17 +58,5 @@ describe Gitlab::GithubImport::BulkImporting do
importer.bulk_insert(model, rows, batch_size: 5)
end
-
- it 'calls pre_hook for each slice if given' do
- rows = [{ title: 'Foo' }] * 10
- model = double(:model, table_name: 'kittens')
- pre_hook = double('pre_hook', call: nil)
- allow(Gitlab::Database).to receive(:bulk_insert)
-
- expect(pre_hook).to receive(:call).with(rows[0..4])
- expect(pre_hook).to receive(:call).with(rows[5..9])
-
- importer.bulk_insert(model, rows, batch_size: 5, pre_hook: pre_hook)
- end
end
end
diff --git a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
index 65a2e1cb5cb..7901ae005d9 100644
--- a/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/issue_importer_spec.rb
@@ -78,11 +78,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
.to receive(:id_for)
.with(issue)
.and_return(milestone.id)
-
- allow(importer.user_finder)
- .to receive(:author_id_for)
- .with(issue)
- .and_return([user.id, true])
end
context 'when the issue author could be found' do
@@ -177,23 +172,6 @@ describe Gitlab::GithubImport::Importer::IssueImporter, :clean_gitlab_redis_cach
expect(importer.create_issue).to be_a_kind_of(Numeric)
end
-
- it 'triggers internal_id functionality to track greatest iids' do
- allow(importer.user_finder)
- .to receive(:author_id_for)
- .with(issue)
- .and_return([user.id, true])
-
- issue = build_stubbed(:issue, project: project)
- allow(importer)
- .to receive(:insert_and_return_id)
- .and_return(issue.id)
- allow(project.issues).to receive(:find).with(issue.id).and_return(issue)
-
- expect(issue).to receive(:ensure_project_iid!)
-
- importer.create_issue
- end
end
describe '#create_assignees' do
diff --git a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb
index db0be760c7b..b1cac3b6e46 100644
--- a/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/milestones_importer_spec.rb
@@ -29,25 +29,13 @@ describe Gitlab::GithubImport::Importer::MilestonesImporter, :clean_gitlab_redis
expect(importer)
.to receive(:bulk_insert)
- .with(Milestone, [milestone_hash], any_args)
+ .with(Milestone, [milestone_hash])
expect(importer)
.to receive(:build_milestones_cache)
importer.execute
end
-
- it 'tracks internal ids' do
- milestone_hash = { iid: 1, title: '1.0', project_id: project.id }
- allow(importer)
- .to receive(:build_milestones)
- .and_return([milestone_hash])
-
- expect(InternalId).to receive(:track_greatest)
- .with(nil, { project: project }, :milestones, 1, any_args)
-
- importer.execute
- end
end
describe '#build_milestones' do
diff --git a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
index 25684ea9e2c..0f21b8843b6 100644
--- a/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
+++ b/spec/lib/gitlab/github_import/importer/pull_request_importer_spec.rb
@@ -111,16 +111,6 @@ describe Gitlab::GithubImport::Importer::PullRequestImporter, :clean_gitlab_redi
expect(mr).to be_instance_of(MergeRequest)
expect(exists).to eq(false)
end
-
- it 'triggers internal_id functionality to track greatest iids' do
- mr = build_stubbed(:merge_request, source_project: project, target_project: project)
- allow(importer).to receive(:insert_and_return_id).and_return(mr.id)
- allow(project.merge_requests).to receive(:find).with(mr.id).and_return(mr)
-
- expect(mr).to receive(:ensure_target_project_iid!)
-
- importer.create_merge_request
- end
end
context 'when the author could not be found' do
diff --git a/spec/models/internal_id_spec.rb b/spec/models/internal_id_spec.rb
index 4696341c05f..d32f163f05b 100644
--- a/spec/models/internal_id_spec.rb
+++ b/spec/models/internal_id_spec.rb
@@ -13,6 +13,29 @@ describe InternalId do
it { is_expected.to validate_presence_of(:usage) }
end
+ describe '.flush_records!' do
+ subject { described_class.flush_records!(project: project) }
+
+ let(:another_project) { create(:project) }
+
+ before do
+ create_list(:issue, 2, project: project)
+ create_list(:issue, 2, project: another_project)
+ end
+
+ it 'deletes all records for the given project' do
+ expect { subject }.to change { described_class.where(project: project).count }.from(1).to(0)
+ end
+
+ it 'retains records for other projects' do
+ expect { subject }.not_to change { described_class.where(project: another_project).count }
+ end
+
+ it 'does not allow an empty filter' do
+ expect { described_class.flush_records!({}) }.to raise_error(/filter cannot be empty/)
+ end
+ end
+
describe '.generate_next' do
subject { described_class.generate_next(issue, scope, usage, init) }
diff --git a/spec/models/project_spec.rb b/spec/models/project_spec.rb
index 4b061b5e24f..7d3f2dfe374 100644
--- a/spec/models/project_spec.rb
+++ b/spec/models/project_spec.rb
@@ -3767,6 +3767,7 @@ describe Project do
expect(import_state).to receive(:remove_jid)
expect(project).to receive(:after_create_default_branch)
expect(project).to receive(:refresh_markdown_cache!)
+ expect(InternalId).to receive(:flush_records!).with(project: project)
project.after_import
end