summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStan Hu <stanhu@gmail.com>2019-08-27 06:07:38 -0700
committerStan Hu <stanhu@gmail.com>2019-08-27 15:52:33 -0700
commit2022e6799bcbf119ea80145e4993ffdb7bb108e3 (patch)
tree19fb831f799ad9353751f7f51b0a5f309f4eead8
parenta58f4f00cf7cc8b4755d0d20094cbbd547d31d2b (diff)
downloadgitlab-ce-sh-lfs-object-batches.tar.gz
Makes LFS object linker process OIDs in batchessh-lfs-object-batches
During a project import, `LfsLinkService` attempts to link `LfsObjects` that have not already been associated with a project. It's possible for a large repo to have thousands of OIDs, which can cause long database query and parsing times. By processing a batch of 1000 at a time, we can reduce that time at the expense of a few more SQL queries. Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/66274
-rw-r--r--app/models/lfs_object.rb1
-rw-r--r--app/services/projects/lfs_pointers/lfs_link_service.rb29
-rw-r--r--changelogs/unreleased/sh-lfs-object-batches.yml5
-rw-r--r--spec/services/projects/lfs_pointers/lfs_link_service_spec.rb18
4 files changed, 48 insertions, 5 deletions
diff --git a/app/models/lfs_object.rb b/app/models/lfs_object.rb
index 79a376ff0fd..40695a97d97 100644
--- a/app/models/lfs_object.rb
+++ b/app/models/lfs_object.rb
@@ -2,6 +2,7 @@
class LfsObject < ApplicationRecord
include AfterCommitQueue
+ include EachBatch
include ObjectStorage::BackgroundMove
has_many :lfs_objects_projects, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
diff --git a/app/services/projects/lfs_pointers/lfs_link_service.rb b/app/services/projects/lfs_pointers/lfs_link_service.rb
index e3c956250f0..38de2af9c1e 100644
--- a/app/services/projects/lfs_pointers/lfs_link_service.rb
+++ b/app/services/projects/lfs_pointers/lfs_link_service.rb
@@ -4,6 +4,8 @@
module Projects
module LfsPointers
class LfsLinkService < BaseService
+ BATCH_SIZE = 1000
+
# Accept an array of oids to link
#
# Returns an array with the oid of the existent lfs objects
@@ -18,16 +20,33 @@ module Projects
# rubocop: disable CodeReuse/ActiveRecord
def link_existing_lfs_objects(oids)
- existent_lfs_objects = LfsObject.where(oid: oids)
+ all_existing_objects = []
+ iterations = 0
+
+ LfsObject.where(oid: oids).each_batch(of: BATCH_SIZE) do |existent_lfs_objects|
+ next unless existent_lfs_objects.any?
+
+ iterations += 1
+ not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects)
+ project.all_lfs_objects << not_linked_lfs_objects
- return [] unless existent_lfs_objects.any?
+ all_existing_objects += existent_lfs_objects.pluck(:oid)
+ end
- not_linked_lfs_objects = existent_lfs_objects.where.not(id: project.all_lfs_objects)
- project.all_lfs_objects << not_linked_lfs_objects
+ log_lfs_link_results(all_existing_objects.count, iterations)
- existent_lfs_objects.pluck(:oid)
+ all_existing_objects
end
# rubocop: enable CodeReuse/ActiveRecord
+
+ def log_lfs_link_results(lfs_objects_linked_count, iterations)
+ Gitlab::Import::Logger.info(
+ class: self.class.name,
+ project_id: project.id,
+ project_path: project.full_path,
+ lfs_objects_linked_count: lfs_objects_linked_count,
+ iterations: iterations)
+ end
end
end
end
diff --git a/changelogs/unreleased/sh-lfs-object-batches.yml b/changelogs/unreleased/sh-lfs-object-batches.yml
new file mode 100644
index 00000000000..09043e286be
--- /dev/null
+++ b/changelogs/unreleased/sh-lfs-object-batches.yml
@@ -0,0 +1,5 @@
+---
+title: Makes LFS object linker process OIDs in batches
+merge_request: 32268
+author:
+type: performance
diff --git a/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb b/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb
index 849601c4a63..66233787d3a 100644
--- a/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb
+++ b/spec/services/projects/lfs_pointers/lfs_link_service_spec.rb
@@ -30,5 +30,23 @@ describe Projects::LfsPointers::LfsLinkService do
expect(subject.execute(new_oid_list.keys)).to eq linked
end
+
+ it 'links in batches' do
+ stub_const("#{described_class}::BATCH_SIZE", 3)
+
+ expect(Gitlab::Import::Logger)
+ .to receive(:info)
+ .with(class: described_class.name,
+ project_id: project.id,
+ project_path: project.full_path,
+ lfs_objects_linked_count: 7,
+ iterations: 3)
+
+ lfs_objects = create_list(:lfs_object, 7)
+ linked = subject.execute(lfs_objects.pluck(:oid))
+
+ expect(project.all_lfs_objects.count).to eq 9
+ expect(linked.size).to eq 7
+ end
end
end