diff options
Diffstat (limited to 'lib/gitlab/import_export/project_tree_restorer.rb')
-rw-r--r-- | lib/gitlab/import_export/project_tree_restorer.rb | 79 |
1 files changed, 53 insertions, 26 deletions
diff --git a/lib/gitlab/import_export/project_tree_restorer.rb b/lib/gitlab/import_export/project_tree_restorer.rb index cbc8d170936..3bc095a99a9 100644 --- a/lib/gitlab/import_export/project_tree_restorer.rb +++ b/lib/gitlab/import_export/project_tree_restorer.rb @@ -9,6 +9,8 @@ module Gitlab @user = user @shared = shared @project = project + @project_id = project.id + @saved = true end def restore @@ -22,8 +24,10 @@ module Gitlab @project_members = @tree_hash.delete('project_members') - ActiveRecord::Base.no_touching do - create_relations + ActiveRecord::Base.uncached do + ActiveRecord::Base.no_touching do + create_relations + end end rescue => e @shared.error(e) @@ -48,21 +52,24 @@ module Gitlab # the configuration yaml file too. # Finally, it updates each attribute in the newly imported project. def create_relations - saved = [] default_relation_list.each do |relation| - next unless relation.is_a?(Hash) || @tree_hash[relation.to_s].present? + if relation.is_a?(Hash) + create_sub_relations(relation, @tree_hash) + elsif @tree_hash[relation.to_s].present? + save_relation_hash(@tree_hash[relation.to_s], relation) + end + end - create_sub_relations(relation, @tree_hash) if relation.is_a?(Hash) + @saved + end - relation_key = relation.is_a?(Hash) ? relation.keys.first : relation - relation_hash_list = @tree_hash[relation_key.to_s] + def save_relation_hash(relation_hash_batch, relation_key) + relation_hash = create_relation(relation_key, relation_hash_batch) - next unless relation_hash_list + @saved = false unless restored_project.append_or_update_attribute(relation_key, relation_hash) - relation_hash = create_relation(relation_key, relation_hash_list) - saved << restored_project.append_or_update_attribute(relation_key, relation_hash) - end - saved.all? + # Restore the project again, extra query that skips holding the AR objects in memory + @restored_project = Project.find(@project_id) end def default_relation_list @@ -93,20 +100,42 @@ module Gitlab # issue, finds any subrelations such as notes, creates them and assign them back to the hash # # Recursively calls this method if the sub-relation is a hash containing more sub-relations - def create_sub_relations(relation, tree_hash) + def create_sub_relations(relation, tree_hash, save: true) relation_key = relation.keys.first.to_s return if tree_hash[relation_key].blank? - [tree_hash[relation_key]].flatten.each do |relation_item| - relation.values.flatten.each do |sub_relation| - # We just use author to get the user ID, do not attempt to create an instance. - next if sub_relation == :author + tree_array = [tree_hash[relation_key]].flatten + + # Avoid keeping a possible heavy object in memory once we are done with it + while relation_item = tree_array.shift + # The transaction at this level is less speedy than one single transaction + # But we can't have it in the upper level or GC won't get rid of the AR objects + # after we save the batch. + Project.transaction do + process_sub_relation(relation, relation_item) + + # For every subrelation that hangs from Project, save the associated records alltogether + # This effectively batches all records per subrelation item, only keeping those in memory + # We have to keep in mind that more batch granularity << Memory, but >> Slowness + if save + save_relation_hash([relation_item], relation_key) + tree_hash[relation_key].delete(relation_item) + end + end + end + + tree_hash.delete(relation_key) if save + end + + def process_sub_relation(relation, relation_item) + relation.values.flatten.each do |sub_relation| + # We just use author to get the user ID, do not attempt to create an instance. + next if sub_relation == :author - create_sub_relations(sub_relation, relation_item) if sub_relation.is_a?(Hash) + create_sub_relations(sub_relation, relation_item, save: false) if sub_relation.is_a?(Hash) - relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation) - relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank? - end + relation_hash, sub_relation = assign_relation_hash(relation_item, sub_relation) + relation_item[sub_relation.to_s] = create_relation(sub_relation, relation_hash) unless relation_hash.blank? end end @@ -121,14 +150,12 @@ module Gitlab end def create_relation(relation, relation_hash_list) - relation_type = relation.to_sym - relation_array = [relation_hash_list].flatten.map do |relation_hash| - Gitlab::ImportExport::RelationFactory.create(relation_sym: relation_type, - relation_hash: parsed_relation_hash(relation_hash, relation_type), + Gitlab::ImportExport::RelationFactory.create(relation_sym: relation.to_sym, + relation_hash: parsed_relation_hash(relation_hash, relation.to_sym), members_mapper: members_mapper, user: @user, - project: restored_project) + project: @restored_project) end.compact relation_hash_list.is_a?(Array) ? relation_array : relation_array.first |