diff options
author | Stan Hu <stanhu@gmail.com> | 2019-08-27 06:07:38 -0700 |
---|---|---|
committer | Stan Hu <stanhu@gmail.com> | 2019-08-27 15:52:33 -0700 |
commit | 2022e6799bcbf119ea80145e4993ffdb7bb108e3 (patch) | |
tree | 19fb831f799ad9353751f7f51b0a5f309f4eead8 /app/services/projects/lfs_pointers | |
parent | a58f4f00cf7cc8b4755d0d20094cbbd547d31d2b (diff) | |
download | gitlab-ce-2022e6799bcbf119ea80145e4993ffdb7bb108e3.tar.gz |
Makes LFS object linker process OIDs in batchessh-lfs-object-batches
During a project import, `LfsLinkService` attempts to link `LfsObjects`
that have not already been associated with a project. It's possible for
a large repo to have thousands of OIDs, which can cause long database
query and parsing times. By processing a batch of 1000 at a time, we
can reduce that time at the expense of a few more SQL queries.
Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/66274
Diffstat (limited to 'app/services/projects/lfs_pointers')
-rw-r--r-- | app/services/projects/lfs_pointers/lfs_link_service.rb | 29 |
1 files changed, 24 insertions, 5 deletions
diff --git a/app/services/projects/lfs_pointers/lfs_link_service.rb b/app/services/projects/lfs_pointers/lfs_link_service.rb index e3c956250f0..38de2af9c1e 100644 --- a/app/services/projects/lfs_pointers/lfs_link_service.rb +++ b/app/services/projects/lfs_pointers/lfs_link_service.rb @@ -4,6 +4,8 @@ module Projects module LfsPointers class LfsLinkService < BaseService + BATCH_SIZE = 1000 + # Accept an array of oids to link # # Returns an array with the oid of the existent lfs objects @@ -18,16 +20,33 @@ module Projects # rubocop: disable CodeReuse/ActiveRecord def link_existing_lfs_objects(oids) - existent_lfs_objects = LfsObject.where(oid: oids) + all_existing_objects = [] + iterations = 0 + + LfsObject.where(oid: oids).each_batch(of: BATCH_SIZE) do |existent_lfs_objects| + next unless existent_lfs_objects.any? + + iterations += 1 + not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects) + project.all_lfs_objects << not_linked_lfs_objects - return [] unless existent_lfs_objects.any? + all_existing_objects += existent_lfs_objects.pluck(:oid) + end - not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects) - project.all_lfs_objects << not_linked_lfs_objects + log_lfs_link_results(all_existing_objects.count, iterations) - existent_lfs_objects.pluck(:oid) + all_existing_objects end # rubocop: enable CodeReuse/ActiveRecord + + def log_lfs_link_results(lfs_objects_linked_count, iterations) + Gitlab::Import::Logger.info( + class: self.class.name, + project_id: project.id, + project_path: project.full_path, + lfs_objects_linked_count: lfs_objects_linked_count, + iterations: iterations) + end end end end |