diff options
-rw-r--r-- | app/models/ci/build.rb | 4 | ||||
-rw-r--r-- | changelogs/unreleased/fix-import-export-performance.yml | 5 | ||||
-rw-r--r-- | lib/gitlab/import_export/import_export.yml | 1 | ||||
-rw-r--r-- | lib/gitlab/import_export/project_tree_restorer.rb | 79 | ||||
-rw-r--r-- | lib/gitlab/import_export/shared.rb | 2 | ||||
-rw-r--r-- | spec/lib/gitlab/import_export/project_tree_restorer_spec.rb | 4 | ||||
-rw-r--r-- | spec/lib/gitlab/import_export/project_tree_saver_spec.rb | 19 |
7 files changed, 77 insertions, 37 deletions
diff --git a/app/models/ci/build.rb b/app/models/ci/build.rb index ba3156154ac..28c16d4037f 100644 --- a/app/models/ci/build.rb +++ b/app/models/ci/build.rb @@ -451,6 +451,10 @@ module Ci trace end + def serializable_hash(options = {}) + super(options).merge(when: read_attribute(:when)) + end + private def update_artifacts_size diff --git a/changelogs/unreleased/fix-import-export-performance.yml b/changelogs/unreleased/fix-import-export-performance.yml new file mode 100644 index 00000000000..1f59c4eb179 --- /dev/null +++ b/changelogs/unreleased/fix-import-export-performance.yml @@ -0,0 +1,5 @@ +--- +title: Improve Import/Export memory usage +merge_request: +author: +type: fixed diff --git a/lib/gitlab/import_export/import_export.yml b/lib/gitlab/import_export/import_export.yml index 78795dd3d92..ec73846d844 100644 --- a/lib/gitlab/import_export/import_export.yml +++ b/lib/gitlab/import_export/import_export.yml @@ -116,6 +116,7 @@ excluded_attributes: statuses: - :trace - :token + - :when push_event_payload: - :event_id diff --git a/lib/gitlab/import_export/project_tree_restorer.rb b/lib/gitlab/import_export/project_tree_restorer.rb index cbc8d170936..3bc095a99a9 100644 --- a/lib/gitlab/import_export/project_tree_restorer.rb +++ b/lib/gitlab/import_export/project_tree_restorer.rb @@ -9,6 +9,8 @@ module Gitlab @user = user @shared = shared @project = project + @project_id = project.id + @saved = true end def restore @@ -22,8 +24,10 @@ module Gitlab @project_members = @tree_hash.delete('project_members') - ActiveRecord::Base.no_touching do - create_relations + ActiveRecord::Base.uncached do + ActiveRecord::Base.no_touching do + create_relations + end end rescue => e @shared.error(e) @@ -48,21 +52,24 @@ module Gitlab # the configuration yaml file too. # Finally, it updates each attribute in the newly imported project. def create_relations - saved = [] default_relation_list.each do |relation| - next unless relation.is_a?(Hash) || @tree_hash[relation.to_s].present? + if relation.is_a?(Hash) + create_sub_relations(relation, @tree_hash) + elsif @tree_hash[relation.to_s].present? + save_relation_hash(@tree_hash[relation.to_s], relation) + end + end - create_sub_relations(relation, @tree_hash) if relation.is_a?(Hash) + @saved + end - relation_key = relation.is_a?(Hash) ? relation.keys.first : relation - relation_hash_list = @tree_hash[relation_key.to_s] + def save_relation_hash(relation_hash_batch, relation_key) + relation_hash = create_relation(relation_key, relation_hash_batch) - next unless relation_hash_list + @saved = false unless restored_project.append_or_update_attribute(relation_key, relation_hash) - relation_hash = create_relation(relation_key, relation_hash_list) - saved << restored_project.append_or_update_attribute(relation_key, relation_hash) - end - saved.all? + # Restore the project again, extra query that skips holding the AR objects in memory + @restored_project = Project.find(@project_id) end def default_relation_list @@ -93,20 +100,42 @@ module Gitlab # issue, finds any subrelations such as notes, creates them and assign them back to the hash # # Recursively calls this method if the sub-relation is a hash containing more sub-relations - def create_sub_relations(relation, tree_hash) + def create_sub_relations(relation, tree_hash, save: true) relation_key = relation.keys.first.to_s return if tree_hash[relation_key].blank? - [tree_hash[relation_key]].flatten.each do |relation_item| - relation.values.flatten.each do |sub_relation| - # We just use author to get the user ID, do not attempt to create an instance. - next if sub_relation == :author + tree_array = [tree_hash[relation_key]].flatten + + # Avoid keeping a possible heavy object in memory once we are done with it + while relation_item = tree_array.shift + # The transaction at this level is less speedy than one single transaction + # But we can't have it in the upper level or GC won't get rid of the AR objects + # after we save the batch. + Project.transaction do + process_sub_relation(relation, relation_item) + + # For every subrelation that hangs from Project, save the associated records alltogether + # This effectively batches all records per subrelation item, only keeping those in memory + # We have to keep in mind that more batch granularity << Memory, but >> Slowness + if save + save_relation_hash([relation_item], relation_key) + tree_hash[relation_key].delete(relation_item) + end + end + end + + tree_hash.delete(relation_key) if save + end + + def process_sub_relation(relation, relation_item) + relation.values.flatten.each do |sub_relation| + # We just use author to get the user ID, do not attempt to create an instance. + next if sub_relation == :author - create_sub_relations(sub_relation, relation_item) if sub_relation.is_a?(Hash) + create_sub_relations(sub_relation, relation_item, save: false) if sub_relation.is_a?(Hash) - relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation) - relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank? - end + relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation) + relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank? end end @@ -121,14 +150,12 @@ module Gitlab end def create_relation(relation, relation_hash_list) - relation_type = relation.to_sym - relation_array = [relation_hash_list].flatten.map do |relation_hash| - Gitlab::ImportExport::RelationFactory.create(relation_sym: relation_type, - relation_hash: parsed_relation_hash(relation_hash, relation_type), + Gitlab::ImportExport::RelationFactory.create(relation_sym: relation.to_sym, + relation_hash: parsed_relation_hash(relation_hash, relation.to_sym), members_mapper: members_mapper, user: @user, - project: restored_project) + project: @restored_project) end.compact relation_hash_list.is_a?(Array) ? relation_array : relation_array.first diff --git a/lib/gitlab/import_export/shared.rb b/lib/gitlab/import_export/shared.rb index 5d6de8bc475..9fd0b709ef2 100644 --- a/lib/gitlab/import_export/shared.rb +++ b/lib/gitlab/import_export/shared.rb @@ -16,7 +16,7 @@ module Gitlab error_out(error.message, caller[0].dup) @errors << error.message # Debug: - Rails.logger.error(error.backtrace) + Rails.logger.error(error.backtrace.join("\n")) end private diff --git a/spec/lib/gitlab/import_export/project_tree_restorer_spec.rb b/spec/lib/gitlab/import_export/project_tree_restorer_spec.rb index 5b16fc5d084..d664d371028 100644 --- a/spec/lib/gitlab/import_export/project_tree_restorer_spec.rb +++ b/spec/lib/gitlab/import_export/project_tree_restorer_spec.rb @@ -11,8 +11,8 @@ describe Gitlab::ImportExport::ProjectTreeRestorer do allow(@shared).to receive(:export_path).and_return('spec/lib/gitlab/import_export/') @project = create(:project, :builds_disabled, :issues_disabled, name: 'project', path: 'project') - allow(@project.repository).to receive(:fetch_ref).and_return(true) - allow(@project.repository.raw).to receive(:rugged_branch_exists?).and_return(false) + allow_any_instance_of(Repository).to receive(:fetch_ref).and_return(true) + allow_any_instance_of(Gitlab::Git::Repository).to receive(:branch_exists?).and_return(false) expect_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch).with('feature', 'DCBA') allow_any_instance_of(Gitlab::Git::Repository).to receive(:create_branch) diff --git a/spec/lib/gitlab/import_export/project_tree_saver_spec.rb b/spec/lib/gitlab/import_export/project_tree_saver_spec.rb index 065b0ec6658..8e3554375e8 100644 --- a/spec/lib/gitlab/import_export/project_tree_saver_spec.rb +++ b/spec/lib/gitlab/import_export/project_tree_saver_spec.rb @@ -117,6 +117,13 @@ describe Gitlab::ImportExport::ProjectTreeSaver do expect(saved_project_json['pipelines'].first['statuses'].count { |hash| hash['type'] == 'Ci::Build' }).to eq(1) end + it 'has no when YML attributes but only the DB column' do + allow_any_instance_of(Ci::Pipeline).to receive(:ci_yaml_file).and_return(File.read(Rails.root.join('spec/support/gitlab_stubs/gitlab_ci.yml'))) + expect_any_instance_of(Ci::GitlabCiYamlProcessor).not_to receive(:build_attributes) + + saved_project_json + end + it 'has pipeline commits' do expect(saved_project_json['pipelines']).not_to be_empty end @@ -251,15 +258,11 @@ describe Gitlab::ImportExport::ProjectTreeSaver do create(:label_priority, label: group_label, priority: 1) milestone = create(:milestone, project: project) merge_request = create(:merge_request, source_project: project, milestone: milestone) - commit_status = create(:commit_status, project: project) - ci_pipeline = create(:ci_pipeline, - project: project, - sha: merge_request.diff_head_sha, - ref: merge_request.source_branch, - statuses: [commit_status]) + ci_build = create(:ci_build, project: project, when: nil) + ci_build.pipeline.update(project: project) + create(:commit_status, project: project, pipeline: ci_build.pipeline) - create(:ci_build, pipeline: ci_pipeline, project: project) create(:milestone, project: project) create(:note, noteable: issue, project: project) create(:note, noteable: merge_request, project: project) @@ -267,7 +270,7 @@ describe Gitlab::ImportExport::ProjectTreeSaver do create(:note_on_commit, author: user, project: project, - commit_id: ci_pipeline.sha) + commit_id: ci_build.pipeline.sha) create(:event, :created, target: milestone, project: project, author: user) create(:service, project: project, type: 'CustomIssueTrackerService', category: 'issue_tracker') |