summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRubén Dávila <ruben@gitlab.com>2018-06-06 13:49:38 -0500
committerRubén Dávila <ruben@gitlab.com>2018-06-06 13:49:38 -0500
commit29818672e12fe779034920933098f47269fdef16 (patch)
treeb71983e6a7909a81294e14be777fe7b8bd9e102b
parent90a5bcc5c9bcf90b4eb17ace072212335b85ea29 (diff)
downloadgitlab-ce-rd-45062-further-improve-lfs-integrity-check-performance.tar.gz
Improve performance used to check LFS integrityrd-45062-further-improve-lfs-integrity-check-performance
Instead of using `git rev-list` and loading each Blob to inspect its content, we're now relying on the usage of `git diff` and `git cat-file`. This has the main advantage that we're reusing the same technique used for other existing checks, hence all the Blobs are cached.
-rw-r--r--changelogs/unreleased/rd-45062-further-improve-lfs-integrity-check-performance.yml5
-rw-r--r--lib/extracts_path.rb3
-rw-r--r--lib/gitlab/checks/change_access.rb2
-rw-r--r--lib/gitlab/checks/lfs_integrity.rb7
-rw-r--r--lib/gitlab/git/blob.rb25
-rw-r--r--lib/gitlab/git/raw_diff_change.rb4
-rw-r--r--spec/lib/gitlab/checks/lfs_integrity_spec.rb17
-rw-r--r--spec/lib/gitlab/git/blob_spec.rb61
8 files changed, 105 insertions, 19 deletions
diff --git a/changelogs/unreleased/rd-45062-further-improve-lfs-integrity-check-performance.yml b/changelogs/unreleased/rd-45062-further-improve-lfs-integrity-check-performance.yml
new file mode 100644
index 00000000000..a502cc1d698
--- /dev/null
+++ b/changelogs/unreleased/rd-45062-further-improve-lfs-integrity-check-performance.yml
@@ -0,0 +1,5 @@
+---
+title: Improve performance of LFS integrity check
+merge_request: 19071
+author:
+type: performance
diff --git a/lib/extracts_path.rb b/lib/extracts_path.rb
index a9b04c183ad..48e3d81e860 100644
--- a/lib/extracts_path.rb
+++ b/lib/extracts_path.rb
@@ -138,8 +138,7 @@ module ExtractsPath
end
def lfs_blob_ids
- blob_ids = tree.blobs.map(&:id)
- @lfs_blob_ids = Gitlab::Git::Blob.batch_lfs_pointers(@project.repository, blob_ids).map(&:id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
+ @lfs_blob_ids = Gitlab::Git::Blob.lfs_pointers_from_tree(@project.repository, tree).map(&:id) # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
private
diff --git a/lib/gitlab/checks/change_access.rb b/lib/gitlab/checks/change_access.rb
index 51ba09aa129..e2326fa882c 100644
--- a/lib/gitlab/checks/change_access.rb
+++ b/lib/gitlab/checks/change_access.rb
@@ -169,7 +169,7 @@ module Gitlab
end
def lfs_objects_exist_check
- lfs_check = Checks::LfsIntegrity.new(project, newrev)
+ lfs_check = Checks::LfsIntegrity.new(project, oldrev, newrev)
if lfs_check.objects_missing?
raise GitAccess::UnauthorizedError, ERROR_MESSAGES[:lfs_objects_missing]
diff --git a/lib/gitlab/checks/lfs_integrity.rb b/lib/gitlab/checks/lfs_integrity.rb
index f0e5773ec3c..603cb36d85d 100644
--- a/lib/gitlab/checks/lfs_integrity.rb
+++ b/lib/gitlab/checks/lfs_integrity.rb
@@ -1,17 +1,16 @@
module Gitlab
module Checks
class LfsIntegrity
- REV_LIST_OBJECT_LIMIT = 2_000
-
- def initialize(project, newrev)
+ def initialize(project, oldrev, newrev)
@project = project
+ @oldrev = oldrev
@newrev = newrev
end
def objects_missing?
return false unless @newrev && @project.lfs_enabled?
- new_lfs_pointers = Gitlab::Git::LfsChanges.new(@project.repository, @newrev).new_pointers(object_limit: REV_LIST_OBJECT_LIMIT)
+ new_lfs_pointers = Gitlab::Git::Blob.lfs_pointers_between(@project.repository, @oldrev, @newrev)
return false unless new_lfs_pointers.present?
diff --git a/lib/gitlab/git/blob.rb b/lib/gitlab/git/blob.rb
index 156d077a69c..f90f90fbe26 100644
--- a/lib/gitlab/git/blob.rb
+++ b/lib/gitlab/git/blob.rb
@@ -85,6 +85,31 @@ module Gitlab
end
end
+ # Find LFS blobs old_rev and new_rev
+ # old_rev and new_rev are commit ID's
+ # Returns array of Gitlab::Git::Blob
+ # Does not guarantee blob data will be set
+ def lfs_pointers_between(repository, old_rev, new_rev)
+ revs_with_paths = repository.raw_changes_between(old_rev, new_rev)
+ .reject { |c| c.deleted? }
+ .select { |c| size_could_be_lfs?(c.blob_size) }
+ .map! { |c| [new_rev, c.new_path] }
+
+ return [] if revs_with_paths.blank?
+
+ batch(repository, revs_with_paths, blob_size_limit: LFS_POINTER_MAX_SIZE)
+ .select(&:lfs_pointer?)
+ end
+
+ def lfs_pointers_from_tree(repository, tree)
+ revs_with_paths = tree.blobs.map! { |blob| [tree.sha, blob.path] }
+
+ return [] if revs_with_paths.blank?
+
+ batch(repository, revs_with_paths, blob_size_limit: LFS_POINTER_MAX_SIZE)
+ .select { |blob| size_could_be_lfs?(blob.size) && blob.lfs_pointer? }
+ end
+
def binary?(data)
EncodingHelper.detect_libgit2_binary?(data)
end
diff --git a/lib/gitlab/git/raw_diff_change.rb b/lib/gitlab/git/raw_diff_change.rb
index 98de9328071..a2815c5603d 100644
--- a/lib/gitlab/git/raw_diff_change.rb
+++ b/lib/gitlab/git/raw_diff_change.rb
@@ -17,6 +17,10 @@ module Gitlab
end
end
+ def deleted?
+ operation == :deleted
+ end
+
private
# Input data has the following format:
diff --git a/spec/lib/gitlab/checks/lfs_integrity_spec.rb b/spec/lib/gitlab/checks/lfs_integrity_spec.rb
index ec22e3a198e..9b6524b518c 100644
--- a/spec/lib/gitlab/checks/lfs_integrity_spec.rb
+++ b/spec/lib/gitlab/checks/lfs_integrity_spec.rb
@@ -5,24 +5,17 @@ describe Gitlab::Checks::LfsIntegrity do
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
- let(:newrev) do
- operations = Gitlab::GitalyClient::StorageSettings.allow_disk_access do
- BareRepoOperations.new(repository.path)
- end
-
- # Create a commit not pointed at by any ref to emulate being in the
- # pre-receive hook so that `--not --all` returns some objects
- operations.commit_tree('8856a329dd38ca86dfb9ce5aa58a16d88cc119bd', "New LFS objects")
- end
+ let(:oldrev) { Gitlab::Git::EMPTY_TREE_ID }
+ let(:newrev) { TestEnv::BRANCH_SHA['lfs'] }
- subject { described_class.new(project, newrev) }
+ subject { described_class.new(project, oldrev, newrev) }
describe '#objects_missing?' do
let(:blob_object) { repository.blob_at_branch('lfs', 'files/lfs/lfs_object.iso') }
context 'with LFS not enabled' do
it 'skips integrity check' do
- expect_any_instance_of(Gitlab::Git::LfsChanges).not_to receive(:new_pointers)
+ expect(Gitlab::Git::Blob).not_to receive(:lfs_pointers_between)
subject.objects_missing?
end
@@ -37,7 +30,7 @@ describe Gitlab::Checks::LfsIntegrity do
let(:newrev) { nil }
it 'skips integrity check' do
- expect_any_instance_of(Gitlab::Git::LfsChanges).not_to receive(:new_pointers)
+ expect(Gitlab::Git::Blob).not_to receive(:lfs_pointers_between)
expect(subject.objects_missing?).to be_falsey
end
diff --git a/spec/lib/gitlab/git/blob_spec.rb b/spec/lib/gitlab/git/blob_spec.rb
index 94eaf86ef80..796dabf7d0e 100644
--- a/spec/lib/gitlab/git/blob_spec.rb
+++ b/spec/lib/gitlab/git/blob_spec.rb
@@ -277,6 +277,67 @@ describe Gitlab::Git::Blob, seed_helper: true do
end
end
+ describe '.lfs_pointers_between' do
+ let(:repository) { create(:project, :repository).repository }
+ let(:old_rev) { Gitlab::Git::EMPTY_TREE_ID }
+ let(:lfs_rev) { TestEnv::BRANCH_SHA['lfs'] }
+ let(:non_lfs_rev) { TestEnv::BRANCH_SHA['feature'] }
+
+ context 'with LFS objects' do
+ it 'returns a list of Gitlab::Git::Blob' do
+ blobs = described_class.lfs_pointers_between(repository, old_rev, lfs_rev)
+
+ expect(blobs.count).to eq(1)
+ expect(blobs).to all( be_a(Gitlab::Git::Blob) )
+ expect(blobs).to be_an(Array)
+ end
+
+ context 'when changes consist of a single LFS pointer' do
+ let(:parent_commit) { '5f923865dde3436854e9ceb9cdb7815618d4e849' }
+ let(:commit_with_lfs_pointer) { '048721d90c449b244b7b4c53a9186b04330174ec' }
+
+ it 'returns the single LFS pointer found' do
+ blobs = described_class.lfs_pointers_between(repository, parent_commit, commit_with_lfs_pointer)
+
+ expect(blobs.count).to eq(1)
+ expect(blobs).to all( be_a(Gitlab::Git::Blob) )
+ end
+ end
+ end
+
+ context 'without LFS objects' do
+ it 'returns an empty Array' do
+ blobs = described_class.lfs_pointers_between(repository, old_rev, non_lfs_rev)
+
+ expect(blobs).to eq([])
+ end
+ end
+ end
+
+ describe '.lfs_pointers_from_tree' do
+ let(:repository) { create(:project, :repository).repository }
+ let(:tree_with_lfs_objects) { repository.tree('lfs', 'files/lfs') }
+ let(:tree_without_lfs_objects) { repository.tree('feature', '/') }
+
+ context 'with a tree with LFS objects' do
+ it 'returns a list of Gitlab::Git::Blob' do
+ blobs = described_class.lfs_pointers_from_tree(repository, tree_with_lfs_objects)
+
+ expect(blobs.count).to eq(1)
+ expect(blobs).to all( be_a(Gitlab::Git::Blob) )
+ expect(blobs).to be_an(Array)
+ end
+ end
+
+ context 'with a tree without LFS objects' do
+ it 'returns an empty Array' do
+ blobs = described_class.lfs_pointers_from_tree(repository, tree_without_lfs_objects)
+
+ expect(blobs).to eq([])
+ end
+ end
+ end
+
describe '.batch_lfs_pointers' do
let(:tree_object) { repository.rugged.rev_parse('master^{tree}') }