diff options
author | Bob Van Landuyt <bob@vanlanduyt.co> | 2017-09-29 19:02:02 +0200 |
---|---|---|
committer | Bob Van Landuyt <bob@vanlanduyt.co> | 2017-10-07 11:46:23 +0200 |
commit | df7f530d843ca03cfdff65b2bb230e00ec60b371 (patch) | |
tree | 7f2d9939eb2a3ec44166e618090740b3f9a0e032 | |
parent | e8ca579d88703aeeaa64dbf4ac45f73a60181568 (diff) | |
download | gitlab-ce-df7f530d843ca03cfdff65b2bb230e00ec60b371.tar.gz |
Add a migration to populate fork networks
This uses the existing ForkedProjectLinks
5 files changed, 346 insertions, 0 deletions
diff --git a/db/migrate/20170929131201_populate_fork_networks.rb b/db/migrate/20170929131201_populate_fork_networks.rb new file mode 100644 index 00000000000..1214962770f --- /dev/null +++ b/db/migrate/20170929131201_populate_fork_networks.rb @@ -0,0 +1,30 @@ +# See http://doc.gitlab.com/ce/development/migration_style_guide.html +# for more information on how to write migrations for GitLab. + +class PopulateForkNetworks < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + MIGRATION = 'PopulateForkNetworksRange'.freeze + BATCH_SIZE = 100 + DELAY_INTERVAL = 15.seconds + + disable_ddl_transaction! + + class ForkedProjectLink < ActiveRecord::Base + include EachBatch + + self.table_name = 'forked_project_links' + end + + def up + say 'Populating the `fork_networks` based on existing `forked_project_links`' + + queue_background_migration_jobs_by_range_at_intervals(ForkedProjectLink, MIGRATION, DELAY_INTERVAL, batch_size: BATCH_SIZE) + end + + def down + # nothing + end +end diff --git a/lib/gitlab/background_migration/create_fork_network_memberships_range.rb b/lib/gitlab/background_migration/create_fork_network_memberships_range.rb new file mode 100644 index 00000000000..4b468e9cd58 --- /dev/null +++ b/lib/gitlab/background_migration/create_fork_network_memberships_range.rb @@ -0,0 +1,60 @@ +module Gitlab + module BackgroundMigration + class CreateForkNetworkMembershipsRange + RESCHEDULE_DELAY = 15 + + class ForkedProjectLink < ActiveRecord::Base + self.table_name = 'forked_project_links' + end + + def perform(start_id, end_id) + log("Creating memberships for forks: #{start_id} - #{end_id}") + + ActiveRecord::Base.connection.execute <<~INSERT_MEMBERS + INSERT INTO fork_network_members (fork_network_id, project_id, forked_from_project_id) + + SELECT fork_network_members.fork_network_id, + forked_project_links.forked_to_project_id, + forked_project_links.forked_from_project_id + + FROM forked_project_links + + INNER JOIN fork_network_members + ON forked_project_links.forked_from_project_id = fork_network_members.project_id + + WHERE forked_project_links.id BETWEEN #{start_id} AND #{end_id} + AND NOT EXISTS ( + SELECT true + FROM fork_network_members existing_members + WHERE existing_members.project_id = forked_project_links.forked_to_project_id + ) + INSERT_MEMBERS + + if missing_members?(start_id, end_id) + BackgroundMigrationWorker.perform_in(RESCHEDULE_DELAY, "CreateForkNetworkMembershipsRange", [start_id, end_id]) + end + end + + def missing_members?(start_id, end_id) + count_sql = <<~MISSING_MEMBERS + SELECT COUNT(*) + + FROM forked_project_links + + WHERE NOT EXISTS ( + SELECT true + FROM fork_network_members + WHERE fork_network_members.project_id = forked_project_links.forked_to_project_id + ) + AND forked_project_links.id BETWEEN #{start_id} AND #{end_id} + MISSING_MEMBERS + + ForkNetworkMember.count_by_sql(count_sql) > 0 + end + + def log(message) + Rails.logger.info("#{self.class.name} - #{message}") + end + end + end +end diff --git a/lib/gitlab/background_migration/populate_fork_networks_range.rb b/lib/gitlab/background_migration/populate_fork_networks_range.rb new file mode 100644 index 00000000000..6c355ed1e75 --- /dev/null +++ b/lib/gitlab/background_migration/populate_fork_networks_range.rb @@ -0,0 +1,54 @@ +module Gitlab + module BackgroundMigration + class PopulateForkNetworksRange + def perform(start_id, end_id) + log("Creating fork networks for forked project links: #{start_id} - #{end_id}") + + ActiveRecord::Base.connection.execute <<~INSERT_NETWORKS + INSERT INTO fork_networks (root_project_id) + SELECT DISTINCT forked_project_links.forked_from_project_id + + FROM forked_project_links + + WHERE NOT EXISTS ( + SELECT true + FROM forked_project_links inner_links + WHERE inner_links.forked_to_project_id = forked_project_links.forked_from_project_id + ) + AND NOT EXISTS ( + SELECT true + FROM fork_networks + WHERE forked_project_links.forked_from_project_id = fork_networks.root_project_id + ) + AND forked_project_links.id BETWEEN #{start_id} AND #{end_id} + INSERT_NETWORKS + + log("Creating memberships for root projects: #{start_id} - #{end_id}") + + ActiveRecord::Base.connection.execute <<~INSERT_ROOT + INSERT INTO fork_network_members (fork_network_id, project_id) + SELECT DISTINCT fork_networks.id, fork_networks.root_project_id + + FROM fork_networks + + INNER JOIN forked_project_links + ON forked_project_links.forked_from_project_id = fork_networks.root_project_id + + WHERE NOT EXISTS ( + SELECT true + FROM fork_network_members + WHERE fork_network_members.project_id = fork_networks.root_project_id + ) + AND forked_project_links.id BETWEEN #{start_id} AND #{end_id} + INSERT_ROOT + + delay = BackgroundMigration::CreateForkNetworkMembershipsRange::RESCHEDULE_DELAY + BackgroundMigrationWorker.perform_in(delay, "CreateForkNetworkMembershipsRange", [start_id, end_id]) + end + + def log(message) + Rails.logger.info("#{self.class.name} - #{message}") + end + end + end +end diff --git a/spec/lib/gitlab/background_migration/create_fork_network_memberships_range_spec.rb b/spec/lib/gitlab/background_migration/create_fork_network_memberships_range_spec.rb new file mode 100644 index 00000000000..1a4ea2bac48 --- /dev/null +++ b/spec/lib/gitlab/background_migration/create_fork_network_memberships_range_spec.rb @@ -0,0 +1,117 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::CreateForkNetworkMembershipsRange, :migration, schema: 20170929131201 do + let(:migration) { described_class.new } + + let(:base1) { create(:project) } + let(:base1_fork1) { create(:project) } + let(:base1_fork2) { create(:project) } + + let(:base2) { create(:project) } + let(:base2_fork1) { create(:project) } + let(:base2_fork2) { create(:project) } + + let(:fork_of_fork) { create(:project) } + let(:fork_of_fork2) { create(:project) } + let(:second_level_fork) { create(:project) } + let(:third_level_fork) { create(:project) } + + let(:fork_network1) { fork_networks.find_by(root_project_id: base1.id) } + let(:fork_network2) { fork_networks.find_by(root_project_id: base2.id) } + + let!(:forked_project_links) { table(:forked_project_links) } + let!(:fork_networks) { table(:fork_networks) } + let!(:fork_network_members) { table(:fork_network_members) } + + before do + # The fork-network relation created for the forked project + fork_networks.create(id: 1, root_project_id: base1.id) + fork_network_members.create(project_id: base1.id, fork_network_id: 1) + fork_networks.create(id: 2, root_project_id: base2.id) + fork_network_members.create(project_id: base2.id, fork_network_id: 2) + + # Normal fork links + forked_project_links.create(id: 1, forked_from_project_id: base1.id, forked_to_project_id: base1_fork1.id) + forked_project_links.create(id: 2, forked_from_project_id: base1.id, forked_to_project_id: base1_fork2.id) + forked_project_links.create(id: 3, forked_from_project_id: base2.id, forked_to_project_id: base2_fork1.id) + forked_project_links.create(id: 4, forked_from_project_id: base2.id, forked_to_project_id: base2_fork2.id) + + # Fork links + forked_project_links.create(id: 5, forked_from_project_id: base1_fork1.id, forked_to_project_id: fork_of_fork.id) + forked_project_links.create(id: 6, forked_from_project_id: base1_fork1.id, forked_to_project_id: fork_of_fork2.id) + + # Forks 3 levels down + forked_project_links.create(id: 7, forked_from_project_id: fork_of_fork.id, forked_to_project_id: second_level_fork.id) + forked_project_links.create(id: 8, forked_from_project_id: second_level_fork.id, forked_to_project_id: third_level_fork.id) + + migration.perform(1, 8) + end + + it 'creates a memberships for the direct forks' do + base1_fork1_membership = fork_network_members.find_by(fork_network_id: fork_network1.id, + project_id: base1_fork1.id) + base1_fork2_membership = fork_network_members.find_by(fork_network_id: fork_network1.id, + project_id: base1_fork2.id) + base2_fork1_membership = fork_network_members.find_by(fork_network_id: fork_network2.id, + project_id: base2_fork1.id) + base2_fork2_membership = fork_network_members.find_by(fork_network_id: fork_network2.id, + project_id: base2_fork2.id) + + expect(base1_fork1_membership.forked_from_project_id).to eq(base1.id) + expect(base1_fork2_membership.forked_from_project_id).to eq(base1.id) + expect(base2_fork1_membership.forked_from_project_id).to eq(base2.id) + expect(base2_fork2_membership.forked_from_project_id).to eq(base2.id) + end + + it 'adds the fork network members for forks of forks' do + fork_of_fork_membership = fork_network_members.find_by(project_id: fork_of_fork.id, + fork_network_id: fork_network1.id) + fork_of_fork2_membership = fork_network_members.find_by(project_id: fork_of_fork2.id, + fork_network_id: fork_network1.id) + second_level_fork_membership = fork_network_members.find_by(project_id: second_level_fork.id, + fork_network_id: fork_network1.id) + third_level_fork_membership = fork_network_members.find_by(project_id: third_level_fork.id, + fork_network_id: fork_network1.id) + + expect(fork_of_fork_membership.forked_from_project_id).to eq(base1_fork1.id) + expect(fork_of_fork2_membership.forked_from_project_id).to eq(base1_fork1.id) + expect(second_level_fork_membership.forked_from_project_id).to eq(fork_of_fork.id) + expect(third_level_fork_membership.forked_from_project_id).to eq(second_level_fork.id) + end + + it 'reschedules itself when there are missing members' do + allow(migration).to receive(:missing_members?).and_return(true) + + expect(BackgroundMigrationWorker) + .to receive(:perform_in).with(described_class::RESCHEDULE_DELAY, "CreateForkNetworkMembershipsRange", [1, 3]) + + migration.perform(1, 3) + end + + it 'can be repeated without effect' do + expect { fork_network_members.count }.not_to change { migration.perform(1, 7) } + end + + it 'knows it is finished for this range' do + expect(migration.missing_members?(1, 7)).to be_falsy + end + + context 'with more forks' do + before do + forked_project_links.create(id: 9, forked_from_project_id: fork_of_fork.id, forked_to_project_id: create(:project).id) + forked_project_links.create(id: 10, forked_from_project_id: fork_of_fork.id, forked_to_project_id: create(:project).id) + end + + it 'only processes a single batch of links at a time' do + expect(fork_network_members.count).to eq(10) + + migration.perform(8, 10) + + expect(fork_network_members.count).to eq(12) + end + + it 'knows when not all memberships withing a batch have been created' do + expect(migration.missing_members?(8, 10)).to be_truthy + end + end +end diff --git a/spec/lib/gitlab/background_migration/populate_fork_networks_range_spec.rb b/spec/lib/gitlab/background_migration/populate_fork_networks_range_spec.rb new file mode 100644 index 00000000000..3ef1873e615 --- /dev/null +++ b/spec/lib/gitlab/background_migration/populate_fork_networks_range_spec.rb @@ -0,0 +1,85 @@ +require 'spec_helper' + +describe Gitlab::BackgroundMigration::PopulateForkNetworksRange, :migration, schema: 20170929131201 do + let(:migration) { described_class.new } + let(:base1) { create(:project) } + let(:base1_fork1) { create(:project) } + let(:base1_fork2) { create(:project) } + + let(:base2) { create(:project) } + let(:base2_fork1) { create(:project) } + let(:base2_fork2) { create(:project) } + + let!(:forked_project_links) { table(:forked_project_links) } + let!(:fork_networks) { table(:fork_networks) } + let!(:fork_network_members) { table(:fork_network_members) } + + let(:fork_network1) { fork_networks.find_by(root_project_id: base1.id) } + let(:fork_network2) { fork_networks.find_by(root_project_id: base2.id) } + + before do + # A normal fork link + forked_project_links.create(id: 1, + forked_from_project_id: base1.id, + forked_to_project_id: base1_fork1.id) + forked_project_links.create(id: 2, + forked_from_project_id: base1.id, + forked_to_project_id: base1_fork2.id) + + forked_project_links.create(id: 3, + forked_from_project_id: base2.id, + forked_to_project_id: base2_fork1.id) + forked_project_links.create(id: 4, + forked_from_project_id: base2_fork1.id, + forked_to_project_id: create(:project).id) + + forked_project_links.create(id: 5, + forked_from_project_id: base2.id, + forked_to_project_id: base2_fork2.id) + + migration.perform(1, 3) + end + + it 'it creates the fork network' do + expect(fork_network1).not_to be_nil + expect(fork_network2).not_to be_nil + end + + it 'does not create a fork network for a fork-of-fork' do + # perfrom the entire batch + migration.perform(1, 5) + + expect(fork_networks.find_by(root_project_id: base2_fork1.id)).to be_nil + end + + it 'creates memberships for the root of fork networks' do + base1_membership = fork_network_members.find_by(fork_network_id: fork_network1.id, + project_id: base1.id) + base2_membership = fork_network_members.find_by(fork_network_id: fork_network2.id, + project_id: base2.id) + + expect(base1_membership).not_to be_nil + expect(base2_membership).not_to be_nil + end + + it 'schedules a job for inserting memberships for forks-of-forks' do + delay = Gitlab::BackgroundMigration::CreateForkNetworkMembershipsRange::RESCHEDULE_DELAY + + expect(BackgroundMigrationWorker) + .to receive(:perform_in).with(delay, "CreateForkNetworkMembershipsRange", [1, 3]) + + migration.perform(1, 3) + end + + it 'only processes a single batch of links at a time' do + expect(fork_network_members.count).to eq(5) + + migration.perform(3, 5) + + expect(fork_network_members.count).to eq(7) + end + + it 'can be repeated without effect' do + expect { migration.perform(1, 3) }.not_to change { fork_network_members.count } + end +end |