diff options
Diffstat (limited to 'lib/gitlab/import_export/project_tree_restorer.rb')
-rw-r--r-- | lib/gitlab/import_export/project_tree_restorer.rb | 198 |
1 files changed, 92 insertions, 106 deletions
diff --git a/lib/gitlab/import_export/project_tree_restorer.rb b/lib/gitlab/import_export/project_tree_restorer.rb index 9433a231b4a..c401f96b5c1 100644 --- a/lib/gitlab/import_export/project_tree_restorer.rb +++ b/lib/gitlab/import_export/project_tree_restorer.rb @@ -15,7 +15,6 @@ module Gitlab @user = user @shared = shared @project = project - @saved = true end def restore @@ -33,7 +32,8 @@ module Gitlab ActiveRecord::Base.uncached do ActiveRecord::Base.no_touching do update_project_params! - create_relations + create_project_relations! + post_import! end end @@ -69,77 +69,75 @@ module Gitlab # in the DB. The structure and relationships between models are guessed from # the configuration yaml file too. # Finally, it updates each attribute in the newly imported project. - def create_relations - project_relations.each do |relation_key, relation_definition| - relation_key_s = relation_key.to_s - - if relation_definition.present? - create_sub_relations(relation_key_s, relation_definition, @tree_hash) - elsif @tree_hash[relation_key_s].present? - save_relation_hash(relation_key_s, @tree_hash[relation_key_s]) - end - end + def create_project_relations! + project_relations.each(&method( + :process_project_relation!)) + end + def post_import! @project.merge_requests.set_latest_merge_request_diff_ids! - - @saved end - def save_relation_hash(relation_key, relation_hash_batch) - relation_hash = create_relation(relation_key, relation_hash_batch) + def process_project_relation!(relation_key, relation_definition) + data_hashes = @tree_hash.delete(relation_key) + return unless data_hashes - remove_group_models(relation_hash) if relation_hash.is_a?(Array) + # we do not care if we process array or hash + data_hashes = [data_hashes] unless data_hashes.is_a?(Array) + + # consume and remove objects from memory + while data_hash = data_hashes.shift + process_project_relation_item!(relation_key, relation_definition, data_hash) + end + end - @saved = false unless @project.append_or_update_attribute(relation_key, relation_hash) + def process_project_relation_item!(relation_key, relation_definition, data_hash) + relation_object = build_relation(relation_key, relation_definition, data_hash) + return unless relation_object + return if group_model?(relation_object) - save_id_mappings(relation_key, relation_hash_batch, relation_hash) + relation_object.project = @project + relation_object.save! - @project.reset + save_id_mapping(relation_key, data_hash, relation_object) end # Older, serialized CI pipeline exports may only have a # merge_request_id and not the full hash of the merge request. To # import these pipelines, we need to preserve the mapping between # the old and new the merge request ID. - def save_id_mappings(relation_key, relation_hash_batch, relation_hash) + def save_id_mapping(relation_key, data_hash, relation_object) return unless relation_key == 'merge_requests' - relation_hash = Array(relation_hash) - - Array(relation_hash_batch).each_with_index do |raw_data, index| - merge_requests_mapping[raw_data['id']] = relation_hash[index]['id'] - end - end - - # Remove project models that became group models as we found them at group level. - # This no longer required saving them at the root project level. - # For example, in the case of an existing group label that matched the title. - def remove_group_models(relation_hash) - relation_hash.reject! do |value| - GROUP_MODELS.include?(value.class) && value.group_id - end + merge_requests_mapping[data_hash['id']] = relation_object.id end def project_relations - @project_relations ||= reader.attributes_finder.find_relations_tree(:project) + @project_relations ||= + reader + .attributes_finder + .find_relations_tree(:project) + .deep_stringify_keys end def update_project_params! - Gitlab::Timeless.timeless(@project) do - project_params = @tree_hash.reject do |key, value| - project_relations.include?(key.to_sym) - end + project_params = @tree_hash.reject do |key, value| + project_relations.include?(key) + end + + project_params = project_params.merge( + present_project_override_params) - project_params = project_params.merge(present_project_override_params) + # Cleaning all imported and overridden params + project_params = Gitlab::ImportExport::AttributeCleaner.clean( + relation_hash: project_params, + relation_class: Project, + excluded_keys: excluded_keys_for_relation(:project)) - # Cleaning all imported and overridden params - project_params = Gitlab::ImportExport::AttributeCleaner.clean( - relation_hash: project_params, - relation_class: Project, - excluded_keys: excluded_keys_for_relation(:project)) + @project.assign_attributes(project_params) + @project.drop_visibility_level! - @project.assign_attributes(project_params) - @project.drop_visibility_level! + Gitlab::Timeless.timeless(@project) do @project.save! end end @@ -156,73 +154,61 @@ module Gitlab @project_override_params ||= @project.import_data&.data&.fetch('override_params', nil) || {} end - # Given a relation hash containing one or more models and its relationships, - # loops through each model and each object from a model type and - # and assigns its correspondent attributes hash from +tree_hash+ - # Example: - # +relation_key+ issues, loops through the list of *issues* and for each individual - # issue, finds any subrelations such as notes, creates them and assign them back to the hash - # - # Recursively calls this method if the sub-relation is a hash containing more sub-relations - def create_sub_relations(relation_key, relation_definition, tree_hash, save: true) - return if tree_hash[relation_key].blank? - - tree_array = [tree_hash[relation_key]].flatten - - # Avoid keeping a possible heavy object in memory once we are done with it - while relation_item = tree_array.shift - # The transaction at this level is less speedy than one single transaction - # But we can't have it in the upper level or GC won't get rid of the AR objects - # after we save the batch. - Project.transaction do - process_sub_relation(relation_key, relation_definition, relation_item) - - # For every subrelation that hangs from Project, save the associated records altogether - # This effectively batches all records per subrelation item, only keeping those in memory - # We have to keep in mind that more batch granularity << Memory, but >> Slowness - if save - save_relation_hash(relation_key, [relation_item]) - tree_hash[relation_key].delete(relation_item) - end - end - end - - tree_hash.delete(relation_key) if save + def build_relations(relation_key, relation_definition, data_hashes) + data_hashes.map do |data_hash| + build_relation(relation_key, relation_definition, data_hash) + end.compact end - def process_sub_relation(relation_key, relation_definition, relation_item) - relation_definition.each do |sub_relation_key, sub_relation_definition| - # We just use author to get the user ID, do not attempt to create an instance. - next if sub_relation_key == :author + def build_relation(relation_key, relation_definition, data_hash) + # TODO: This is hack to not create relation for the author + # Rather make `RelationFactory#set_note_author` to take care of that + return data_hash if relation_key == 'author' - sub_relation_key_s = sub_relation_key.to_s + # create relation objects recursively for all sub-objects + relation_definition.each do |sub_relation_key, sub_relation_definition| + transform_sub_relations!(data_hash, sub_relation_key, sub_relation_definition) + end - # create dependent relations if present - if sub_relation_definition.present? - create_sub_relations(sub_relation_key_s, sub_relation_definition, relation_item, save: false) + Gitlab::ImportExport::RelationFactory.create( + relation_sym: relation_key.to_sym, + relation_hash: data_hash, + members_mapper: members_mapper, + merge_requests_mapping: merge_requests_mapping, + user: @user, + project: @project, + excluded_keys: excluded_keys_for_relation(relation_key)) + end + + def transform_sub_relations!(data_hash, sub_relation_key, sub_relation_definition) + sub_data_hash = data_hash[sub_relation_key] + return unless sub_data_hash + + # if object is a hash we can create simple object + # as it means that this is 1-to-1 vs 1-to-many + sub_data_hash = + if sub_data_hash.is_a?(Array) + build_relations( + sub_relation_key, + sub_relation_definition, + sub_data_hash).presence + else + build_relation( + sub_relation_key, + sub_relation_definition, + sub_data_hash) end - # transform relation hash to actual object - sub_relation_hash = relation_item[sub_relation_key_s] - if sub_relation_hash.present? - relation_item[sub_relation_key_s] = create_relation(sub_relation_key, sub_relation_hash) - end + # persist object(s) or delete from relation + if sub_data_hash + data_hash[sub_relation_key] = sub_data_hash + else + data_hash.delete(sub_relation_key) end end - def create_relation(relation_key, relation_hash_list) - relation_array = [relation_hash_list].flatten.map do |relation_hash| - Gitlab::ImportExport::RelationFactory.create( - relation_sym: relation_key.to_sym, - relation_hash: relation_hash, - members_mapper: members_mapper, - merge_requests_mapping: merge_requests_mapping, - user: @user, - project: @project, - excluded_keys: excluded_keys_for_relation(relation_key)) - end.compact - - relation_hash_list.is_a?(Array) ? relation_array : relation_array.first + def group_model?(relation_object) + GROUP_MODELS.include?(relation_object.class) && relation_object.group_id end def reader |