summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--GITALY_SERVER_VERSION2
-rw-r--r--app/models/merge_request_diff.rb4
-rw-r--r--app/models/note_diff_file.rb4
-rw-r--r--app/services/projects/cleanup_service.rb47
-rw-r--r--changelogs/unreleased/30093-apply-bfg-object-map-to-database.yml5
-rw-r--r--doc/user/project/repository/reducing_the_repo_size_using_git.md6
-rw-r--r--lib/gitlab/discussions_diff/highlight_cache.rb13
-rw-r--r--lib/gitlab/git/repository_cleaner.rb4
-rw-r--r--lib/gitlab/gitaly_client/cleanup_service.rb33
-rw-r--r--spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb110
-rw-r--r--spec/lib/gitlab/git/repository_cleaner_spec.rb71
-rw-r--r--spec/lib/gitlab/gitaly_client/cleanup_service_spec.rb10
-rw-r--r--spec/models/note_diff_file_spec.rb27
-rw-r--r--spec/services/projects/cleanup_service_spec.rb89
14 files changed, 302 insertions, 123 deletions
diff --git a/GITALY_SERVER_VERSION b/GITALY_SERVER_VERSION
index 32b7211cb61..7d47e599800 100644
--- a/GITALY_SERVER_VERSION
+++ b/GITALY_SERVER_VERSION
@@ -1 +1 @@
-1.40.0
+1.41.0
diff --git a/app/models/merge_request_diff.rb b/app/models/merge_request_diff.rb
index 5f5d92bc2f0..f45bd0e03de 100644
--- a/app/models/merge_request_diff.rb
+++ b/app/models/merge_request_diff.rb
@@ -51,6 +51,10 @@ class MergeRequestDiff < ApplicationRecord
joins(:merge_request_diff_commits).where(merge_request_diff_commits: { sha: sha }).reorder(nil)
end
+ scope :by_project_id, -> (project_id) do
+ joins(:merge_request).where(merge_requests: { target_project_id: project_id })
+ end
+
scope :recent, -> { order(id: :desc).limit(100) }
scope :files_in_database, -> { where(stored_externally: [false, nil]) }
diff --git a/app/models/note_diff_file.rb b/app/models/note_diff_file.rb
index 9afb94c869a..fcc9e2b3fd8 100644
--- a/app/models/note_diff_file.rb
+++ b/app/models/note_diff_file.rb
@@ -7,6 +7,10 @@ class NoteDiffFile < ApplicationRecord
joins(:diff_note).where("resolved_at IS NULL OR noteable_type = 'Commit'")
end
+ scope :referencing_sha, -> (oids, project_id:) do
+ joins(:diff_note).where(notes: { project_id: project_id, commit_id: oids })
+ end
+
delegate :original_position, :project, to: :diff_note
belongs_to :diff_note, inverse_of: :note_diff_file
diff --git a/app/services/projects/cleanup_service.rb b/app/services/projects/cleanup_service.rb
index 12103ea34b5..5972bfd4071 100644
--- a/app/services/projects/cleanup_service.rb
+++ b/app/services/projects/cleanup_service.rb
@@ -18,9 +18,6 @@ module Projects
# per rewritten object, with the old and new SHAs space-separated. It can be
# used to update or remove content that references the objects that BFG has
# altered
- #
- # Currently, only the project repository is modified by this service, but we
- # may wish to modify other data sources in the future.
def execute
apply_bfg_object_map!
@@ -41,10 +38,52 @@ module Projects
raise NoUploadError unless project.bfg_object_map.exists?
project.bfg_object_map.open do |io|
- repository_cleaner.apply_bfg_object_map(io)
+ repository_cleaner.apply_bfg_object_map_stream(io) do |response|
+ cleanup_diffs(response)
+ end
+ end
+ end
+
+ def cleanup_diffs(response)
+ old_commit_shas = extract_old_commit_shas(response.entries)
+
+ ActiveRecord::Base.transaction do
+ cleanup_merge_request_diffs(old_commit_shas)
+ cleanup_note_diff_files(old_commit_shas)
end
end
+ def extract_old_commit_shas(batch)
+ batch.lazy.select { |entry| entry.type == :COMMIT }.map(&:old_oid).force
+ end
+
+ def cleanup_merge_request_diffs(old_commit_shas)
+ merge_request_diffs = MergeRequestDiff
+ .by_project_id(project.id)
+ .by_commit_sha(old_commit_shas)
+
+ # It's important to run the ActiveRecord callbacks here
+ merge_request_diffs.destroy_all # rubocop:disable Cop/DestroyAll
+
+ # TODO: ensure the highlight cache is removed immediately. It's too hard
+ # to calculate the Redis keys at present.
+ #
+ # https://gitlab.com/gitlab-org/gitlab-ce/issues/61115
+ end
+
+ def cleanup_note_diff_files(old_commit_shas)
+ # Pluck the IDs instead of running the query twice to ensure we clear the
+ # cache for exactly the note diffs we remove
+ ids = NoteDiffFile
+ .referencing_sha(old_commit_shas, project_id: project.id)
+ .pluck_primary_key
+
+ NoteDiffFile.id_in(ids).delete_all
+
+ # A highlighted version of the diff is stored in redis. Remove it now.
+ Gitlab::DiscussionsDiff::HighlightCache.clear_multiple(ids)
+ end
+
def repository_cleaner
@repository_cleaner ||= Gitlab::Git::RepositoryCleaner.new(repository.raw)
end
diff --git a/changelogs/unreleased/30093-apply-bfg-object-map-to-database.yml b/changelogs/unreleased/30093-apply-bfg-object-map-to-database.yml
new file mode 100644
index 00000000000..ec851dfcacc
--- /dev/null
+++ b/changelogs/unreleased/30093-apply-bfg-object-map-to-database.yml
@@ -0,0 +1,5 @@
+---
+title: Remove cleaned up OIDs from database and cache
+merge_request: 26555
+author:
+type: added
diff --git a/doc/user/project/repository/reducing_the_repo_size_using_git.md b/doc/user/project/repository/reducing_the_repo_size_using_git.md
index 672567a8d7d..2339759ecc8 100644
--- a/doc/user/project/repository/reducing_the_repo_size_using_git.md
+++ b/doc/user/project/repository/reducing_the_repo_size_using_git.md
@@ -98,6 +98,12 @@ up its own internal state, maximizing the space saved.
`git gc` against the repository. You will receive an email once it has
completed.
+This process will remove some copies of the rewritten commits from GitLab's
+cache and database, but there are still numerous gaps in coverage - at present,
+some of the copies may persist indefinitely. [Clearing the instance cache]
+(../../../administration/raketasks/maintenance.md#clear-redis-cache) may help to
+remove some of them, but it should not be depended on for security purposes!
+
## Using `git filter-branch`
1. Navigate to your repository:
diff --git a/lib/gitlab/discussions_diff/highlight_cache.rb b/lib/gitlab/discussions_diff/highlight_cache.rb
index 270cfb89488..369c6b87fb4 100644
--- a/lib/gitlab/discussions_diff/highlight_cache.rb
+++ b/lib/gitlab/discussions_diff/highlight_cache.rb
@@ -52,6 +52,19 @@ module Gitlab
end
end
+ # Clears multiple cache keys at once.
+ #
+ # raw_keys - An Array of unique cache keys, without namespaces.
+ #
+ # It returns the number of cache keys cleared. Ex.: 42
+ def clear_multiple(raw_keys)
+ return [] if raw_keys.empty?
+
+ keys = raw_keys.map { |id| cache_key_for(id) }
+
+ Redis::Cache.with { |redis| redis.del(keys) }
+ end
+
def cache_key_for(raw_key)
"#{cache_key_prefix}:#{raw_key}"
end
diff --git a/lib/gitlab/git/repository_cleaner.rb b/lib/gitlab/git/repository_cleaner.rb
index 2d1d8435cf3..9dd0ddfb44b 100644
--- a/lib/gitlab/git/repository_cleaner.rb
+++ b/lib/gitlab/git/repository_cleaner.rb
@@ -12,9 +12,9 @@ module Gitlab
@repository = repository
end
- def apply_bfg_object_map(io)
+ def apply_bfg_object_map_stream(io, &blk)
wrapped_gitaly_errors do
- gitaly_cleanup_client.apply_bfg_object_map(io)
+ gitaly_cleanup_client.apply_bfg_object_map_stream(io, &blk)
end
end
diff --git a/lib/gitlab/gitaly_client/cleanup_service.rb b/lib/gitlab/gitaly_client/cleanup_service.rb
index 3e8d6a773ca..a56bc35f6d7 100644
--- a/lib/gitlab/gitaly_client/cleanup_service.rb
+++ b/lib/gitlab/gitaly_client/cleanup_service.rb
@@ -12,25 +12,32 @@ module Gitlab
@storage = repository.storage
end
- def apply_bfg_object_map(io)
- first_request = Gitaly::ApplyBfgObjectMapRequest.new(repository: gitaly_repo)
+ def apply_bfg_object_map_stream(io, &blk)
+ responses = GitalyClient.call(
+ storage,
+ :cleanup_service,
+ :apply_bfg_object_map_stream,
+ build_object_map_enum(io),
+ timeout: GitalyClient.no_timeout
+ )
+
+ responses.each(&blk)
+ end
+
+ private
- enum = Enumerator.new do |y|
- y.yield first_request
+ def build_object_map_enum(io)
+ Enumerator.new do |y|
+ # First request. For simplicity, doesn't include any object map data
+ y << Gitaly::ApplyBfgObjectMapStreamRequest.new(repository: gitaly_repo)
+ # Now stream the BFG object map file to gitaly in chunks
while data = io.read(RepositoryService::MAX_MSG_SIZE)
- y.yield Gitaly::ApplyBfgObjectMapRequest.new(object_map: data)
+ y << Gitaly::ApplyBfgObjectMapStreamRequest.new(object_map: data)
+
break if io&.eof?
end
end
-
- GitalyClient.call(
- storage,
- :cleanup_service,
- :apply_bfg_object_map,
- enum,
- timeout: GitalyClient.no_timeout
- )
end
end
end
diff --git a/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb b/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb
index fe26ebb8796..15ee8c40b55 100644
--- a/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb
+++ b/spec/lib/gitlab/discussions_diff/highlight_cache_spec.rb
@@ -3,31 +3,32 @@
require 'spec_helper'
describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cache do
+ def fake_file(offset)
+ {
+ text: 'foo',
+ type: 'new',
+ index: 2 + offset,
+ old_pos: 10 + offset,
+ new_pos: 11 + offset,
+ line_code: 'xpto',
+ rich_text: '<blips>blops</blips>'
+ }
+ end
+
+ let(:mapping) do
+ {
+ 3 => [
+ fake_file(0),
+ fake_file(1)
+ ],
+ 4 => [
+ fake_file(2)
+ ]
+ }
+ end
+
describe '#write_multiple' do
it 'sets multiple keys serializing content as JSON' do
- mapping = {
- 3 => [
- {
- text: 'foo',
- type: 'new',
- index: 2,
- old_pos: 10,
- new_pos: 11,
- line_code: 'xpto',
- rich_text: '<blips>blops</blips>'
- },
- {
- text: 'foo',
- type: 'new',
- index: 3,
- old_pos: 11,
- new_pos: 12,
- line_code: 'xpto',
- rich_text: '<blops>blips</blops>'
- }
- ]
- }
-
described_class.write_multiple(mapping)
mapping.each do |key, value|
@@ -41,53 +42,16 @@ describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cache do
describe '#read_multiple' do
it 'reads multiple keys and serializes content into Gitlab::Diff::Line objects' do
- mapping = {
- 3 => [
- {
- text: 'foo',
- type: 'new',
- index: 2,
- old_pos: 11,
- new_pos: 12,
- line_code: 'xpto',
- rich_text: '<blips>blops</blips>'
- },
- {
- text: 'foo',
- type: 'new',
- index: 3,
- old_pos: 10,
- new_pos: 11,
- line_code: 'xpto',
- rich_text: '<blips>blops</blips>'
- }
- ]
- }
-
described_class.write_multiple(mapping)
found = described_class.read_multiple(mapping.keys)
- expect(found.size).to eq(1)
+ expect(found.size).to eq(2)
expect(found.first.size).to eq(2)
expect(found.first).to all(be_a(Gitlab::Diff::Line))
end
it 'returns nil when cached key is not found' do
- mapping = {
- 3 => [
- {
- text: 'foo',
- type: 'new',
- index: 2,
- old_pos: 11,
- new_pos: 12,
- line_code: 'xpto',
- rich_text: '<blips>blops</blips>'
- }
- ]
- }
-
described_class.write_multiple(mapping)
found = described_class.read_multiple([2, 3])
@@ -95,8 +59,30 @@ describe Gitlab::DiscussionsDiff::HighlightCache, :clean_gitlab_redis_cache do
expect(found.size).to eq(2)
expect(found.first).to eq(nil)
- expect(found.second.size).to eq(1)
+ expect(found.second.size).to eq(2)
expect(found.second).to all(be_a(Gitlab::Diff::Line))
end
end
+
+ describe '#clear_multiple' do
+ it 'removes all named keys' do
+ described_class.write_multiple(mapping)
+
+ described_class.clear_multiple(mapping.keys)
+
+ expect(described_class.read_multiple(mapping.keys)).to all(be_nil)
+ end
+
+ it 'only removed named keys' do
+ to_clear, to_leave = mapping.keys
+
+ described_class.write_multiple(mapping)
+ described_class.clear_multiple([to_clear])
+
+ cleared, left = described_class.read_multiple([to_clear, to_leave])
+
+ expect(cleared).to be_nil
+ expect(left).to all(be_a(Gitlab::Diff::Line))
+ end
+ end
end
diff --git a/spec/lib/gitlab/git/repository_cleaner_spec.rb b/spec/lib/gitlab/git/repository_cleaner_spec.rb
index 6602f22843f..7bba0107e58 100644
--- a/spec/lib/gitlab/git/repository_cleaner_spec.rb
+++ b/spec/lib/gitlab/git/repository_cleaner_spec.rb
@@ -6,55 +6,62 @@ describe Gitlab::Git::RepositoryCleaner do
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
let(:head_sha) { repository.head_commit.id }
- let(:object_map_data) { "#{head_sha} #{'0' * 40}" }
+ let(:object_map_data) { "#{head_sha} #{Gitlab::Git::BLANK_SHA}" }
- subject(:cleaner) { described_class.new(repository.raw) }
+ let(:clean_refs) { %W[refs/environments/1 refs/merge-requests/1 refs/keep-around/#{head_sha}] }
+ let(:keep_refs) { %w[refs/heads/_keep refs/tags/_keep] }
- describe '#apply_bfg_object_map' do
- let(:clean_refs) { %W[refs/environments/1 refs/merge-requests/1 refs/keep-around/#{head_sha}] }
- let(:keep_refs) { %w[refs/heads/_keep refs/tags/_keep] }
+ subject(:cleaner) { described_class.new(repository.raw) }
+ shared_examples_for '#apply_bfg_object_map_stream' do
before do
(clean_refs + keep_refs).each { |ref| repository.create_ref(head_sha, ref) }
end
- context 'from StringIO' do
- let(:object_map) { StringIO.new(object_map_data) }
+ it 'removes internal references' do
+ entries = []
- it 'removes internal references' do
- cleaner.apply_bfg_object_map(object_map)
+ cleaner.apply_bfg_object_map_stream(object_map) do |rsp|
+ entries.concat(rsp.entries)
+ end
- aggregate_failures do
- clean_refs.each { |ref| expect(repository.ref_exists?(ref)).to be_falsy }
- keep_refs.each { |ref| expect(repository.ref_exists?(ref)).to be_truthy }
- end
+ aggregate_failures do
+ clean_refs.each { |ref| expect(repository.ref_exists?(ref)).to be(false) }
+ keep_refs.each { |ref| expect(repository.ref_exists?(ref)).to be(true) }
+
+ expect(entries).to contain_exactly(
+ Gitaly::ApplyBfgObjectMapStreamResponse::Entry.new(
+ type: :COMMIT,
+ old_oid: head_sha,
+ new_oid: Gitlab::Git::BLANK_SHA
+ )
+ )
end
end
+ end
- context 'from Gitlab::HttpIO' do
- let(:url) { 'http://example.com/bfg_object_map.txt' }
- let(:tempfile) { Tempfile.new }
- let(:object_map) { Gitlab::HttpIO.new(url, object_map_data.size) }
+ describe '#apply_bfg_object_map_stream (from StringIO)' do
+ let(:object_map) { StringIO.new(object_map_data) }
- around do |example|
- tempfile.write(object_map_data)
- tempfile.close
+ include_examples '#apply_bfg_object_map_stream'
+ end
- example.run
- ensure
- tempfile.unlink
- end
+ describe '#apply_bfg_object_map_stream (from Gitlab::HttpIO)' do
+ let(:url) { 'http://example.com/bfg_object_map.txt' }
+ let(:tempfile) { Tempfile.new }
+ let(:object_map) { Gitlab::HttpIO.new(url, object_map_data.size) }
- it 'removes internal references' do
- stub_remote_url_200(url, tempfile.path)
+ around do |example|
+ tempfile.write(object_map_data)
+ tempfile.close
- cleaner.apply_bfg_object_map(object_map)
+ stub_remote_url_200(url, tempfile.path)
- aggregate_failures do
- clean_refs.each { |ref| expect(repository.ref_exists?(ref)).to be_falsy }
- keep_refs.each { |ref| expect(repository.ref_exists?(ref)).to be_truthy }
- end
- end
+ example.run
+ ensure
+ tempfile.unlink
end
+
+ include_examples '#apply_bfg_object_map_stream'
end
end
diff --git a/spec/lib/gitlab/gitaly_client/cleanup_service_spec.rb b/spec/lib/gitlab/gitaly_client/cleanup_service_spec.rb
index 369deff732a..c42332dc27b 100644
--- a/spec/lib/gitlab/gitaly_client/cleanup_service_spec.rb
+++ b/spec/lib/gitlab/gitaly_client/cleanup_service_spec.rb
@@ -6,14 +6,14 @@ describe Gitlab::GitalyClient::CleanupService do
let(:relative_path) { project.disk_path + '.git' }
let(:client) { described_class.new(project.repository) }
- describe '#apply_bfg_object_map' do
- it 'sends an apply_bfg_object_map message' do
+ describe '#apply_bfg_object_map_stream' do
+ it 'sends an apply_bfg_object_map_stream message' do
expect_any_instance_of(Gitaly::CleanupService::Stub)
- .to receive(:apply_bfg_object_map)
+ .to receive(:apply_bfg_object_map_stream)
.with(kind_of(Enumerator), kind_of(Hash))
- .and_return(double)
+ .and_return([])
- client.apply_bfg_object_map(StringIO.new)
+ client.apply_bfg_object_map_stream(StringIO.new)
end
end
end
diff --git a/spec/models/note_diff_file_spec.rb b/spec/models/note_diff_file_spec.rb
index 99eeac8d778..b15bedd257e 100644
--- a/spec/models/note_diff_file_spec.rb
+++ b/spec/models/note_diff_file_spec.rb
@@ -10,4 +10,31 @@ describe NoteDiffFile do
describe 'validations' do
it { is_expected.to validate_presence_of(:diff_note) }
end
+
+ describe '.referencing_sha' do
+ let!(:diff_note) { create(:diff_note_on_commit) }
+
+ let(:note_diff_file) { diff_note.note_diff_file }
+ let(:project) { diff_note.project }
+
+ it 'finds note diff files by project and sha' do
+ found = described_class.referencing_sha(diff_note.commit_id, project_id: project.id)
+
+ expect(found).to contain_exactly(note_diff_file)
+ end
+
+ it 'excludes note diff files with the wrong project' do
+ other_project = create(:project)
+
+ found = described_class.referencing_sha(diff_note.commit_id, project_id: other_project.id)
+
+ expect(found).to be_empty
+ end
+
+ it 'excludes note diff files with the wrong sha' do
+ found = described_class.referencing_sha(Gitlab::Git::BLANK_SHA, project_id: project.id)
+
+ expect(found).to be_empty
+ end
+ end
end
diff --git a/spec/services/projects/cleanup_service_spec.rb b/spec/services/projects/cleanup_service_spec.rb
index 29eabc86327..5c246854eb7 100644
--- a/spec/services/projects/cleanup_service_spec.rb
+++ b/spec/services/projects/cleanup_service_spec.rb
@@ -6,13 +6,13 @@ describe Projects::CleanupService do
let(:project) { create(:project, :repository, bfg_object_map: fixture_file_upload('spec/fixtures/bfg_object_map.txt')) }
let(:object_map) { project.bfg_object_map }
+ let(:cleaner) { service.__send__(:repository_cleaner) }
+
subject(:service) { described_class.new(project) }
describe '#execute' do
- it 'runs the apply_bfg_object_map gitaly RPC' do
- expect_next_instance_of(Gitlab::Git::RepositoryCleaner) do |cleaner|
- expect(cleaner).to receive(:apply_bfg_object_map).with(kind_of(IO))
- end
+ it 'runs the apply_bfg_object_map_stream gitaly RPC' do
+ expect(cleaner).to receive(:apply_bfg_object_map_stream).with(kind_of(IO))
service.execute
end
@@ -37,10 +37,91 @@ describe Projects::CleanupService do
expect(object_map.exists?).to be_falsy
end
+ context 'with a tainted merge request diff' do
+ let(:merge_request) { create(:merge_request, source_project: project, target_project: project) }
+ let(:diff) { merge_request.merge_request_diff }
+ let(:entry) { build_entry(diff.commits.first.id) }
+
+ before do
+ allow(cleaner)
+ .to receive(:apply_bfg_object_map_stream)
+ .and_yield(Gitaly::ApplyBfgObjectMapStreamResponse.new(entries: [entry]))
+ end
+
+ it 'removes the tainted commit from the database' do
+ service.execute
+
+ expect(MergeRequestDiff.exists?(diff.id)).to be_falsy
+ end
+
+ it 'ignores non-commit responses from Gitaly' do
+ entry.type = :UNKNOWN
+
+ service.execute
+
+ expect(MergeRequestDiff.exists?(diff.id)).to be_truthy
+ end
+ end
+
+ context 'with a tainted diff note' do
+ let(:diff_note) { create(:diff_note_on_commit, project: project) }
+ let(:note_diff_file) { diff_note.note_diff_file }
+ let(:entry) { build_entry(diff_note.commit_id) }
+
+ let(:highlight_cache) { Gitlab::DiscussionsDiff::HighlightCache }
+ let(:cache_id) { note_diff_file.id }
+
+ before do
+ allow(cleaner)
+ .to receive(:apply_bfg_object_map_stream)
+ .and_yield(Gitaly::ApplyBfgObjectMapStreamResponse.new(entries: [entry]))
+ end
+
+ it 'removes the tainted commit from the database' do
+ service.execute
+
+ expect(NoteDiffFile.exists?(note_diff_file.id)).to be_falsy
+ end
+
+ it 'removes the highlight cache from redis' do
+ write_cache(highlight_cache, cache_id, [{}])
+
+ expect(read_cache(highlight_cache, cache_id)).not_to be_nil
+
+ service.execute
+
+ expect(read_cache(highlight_cache, cache_id)).to be_nil
+ end
+
+ it 'ignores non-commit responses from Gitaly' do
+ entry.type = :UNKNOWN
+
+ service.execute
+
+ expect(NoteDiffFile.exists?(note_diff_file.id)).to be_truthy
+ end
+ end
+
it 'raises an error if no object map can be found' do
object_map.remove!
expect { service.execute }.to raise_error(described_class::NoUploadError)
end
end
+
+ def build_entry(old_oid)
+ Gitaly::ApplyBfgObjectMapStreamResponse::Entry.new(
+ type: :COMMIT,
+ old_oid: old_oid,
+ new_oid: Gitlab::Git::BLANK_SHA
+ )
+ end
+
+ def read_cache(cache, key)
+ cache.read_multiple([key]).first
+ end
+
+ def write_cache(cache, key, value)
+ cache.write_multiple(key => value)
+ end
end