summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDouwe Maan <douwe@gitlab.com>2018-04-25 08:22:43 +0000
committerMayra Cabrera <mcabrera@gitlab.com>2018-04-25 08:44:18 -0500
commitfbf317c25cfaf390592dd87f86f39fd303c0859e (patch)
tree1e473d7bb4e0399d51309354bfe8ec72b17c4d3b
parent598247237a8a3f614203b9c270bc59fb48e8687f (diff)
downloadgitlab-ce-fbf317c25cfaf390592dd87f86f39fd303c0859e.tar.gz
Merge branch 'security-45689-fix-archive-cache-bug' into 'security-10-7'
Serve archive requests with the correct file in all cases (10.7) See merge request gitlab/gitlabhq!2376
-rw-r--r--app/services/repository_archive_clean_up_service.rb5
-rw-r--r--changelogs/unreleased/security-45689-fix-archive-cache-bug.yml5
-rw-r--r--lib/gitlab/git/repository.rb51
-rw-r--r--spec/lib/gitlab/git/repository_spec.rb87
-rw-r--r--spec/services/repository_archive_clean_up_service_spec.rb68
5 files changed, 144 insertions, 72 deletions
diff --git a/app/services/repository_archive_clean_up_service.rb b/app/services/repository_archive_clean_up_service.rb
index aa84d36a206..ba9eefccd13 100644
--- a/app/services/repository_archive_clean_up_service.rb
+++ b/app/services/repository_archive_clean_up_service.rb
@@ -20,11 +20,12 @@ class RepositoryArchiveCleanUpService
private
def clean_up_old_archives
- run(%W(find #{path} -not -path #{path} -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -maxdepth 2 -mmin +#{mmin} -delete))
+ run(%W(find #{path} -mindepth 1 -maxdepth 3 -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -mmin +#{mmin} -delete))
end
def clean_up_empty_directories
- run(%W(find #{path} -not -path #{path} -type d -empty -name \*.git -maxdepth 1 -delete))
+ run(%W(find #{path} -mindepth 2 -maxdepth 2 -type d -empty -delete))
+ run(%W(find #{path} -mindepth 1 -maxdepth 1 -type d -empty -delete))
end
def run(cmd)
diff --git a/changelogs/unreleased/security-45689-fix-archive-cache-bug.yml b/changelogs/unreleased/security-45689-fix-archive-cache-bug.yml
new file mode 100644
index 00000000000..0103a7fc430
--- /dev/null
+++ b/changelogs/unreleased/security-45689-fix-archive-cache-bug.yml
@@ -0,0 +1,5 @@
+---
+title: Serve archive requests with the correct file in all cases
+merge_request:
+author:
+type: security
diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb
index aafa9b19440..5af26fb95ed 100644
--- a/lib/gitlab/git/repository.rb
+++ b/lib/gitlab/git/repository.rb
@@ -397,18 +397,6 @@ module Gitlab
nil
end
- def archive_prefix(ref, sha, append_sha:)
- append_sha = (ref != sha) if append_sha.nil?
-
- project_name = self.name.chomp('.git')
- formatted_ref = ref.tr('/', '-')
-
- prefix_segments = [project_name, formatted_ref]
- prefix_segments << sha if append_sha
-
- prefix_segments.join('-')
- end
-
def archive_metadata(ref, storage_path, format = "tar.gz", append_sha:)
ref ||= root_ref
commit = Gitlab::Git::Commit.find(self, ref)
@@ -419,12 +407,44 @@ module Gitlab
{
'RepoPath' => path,
'ArchivePrefix' => prefix,
- 'ArchivePath' => archive_file_path(prefix, storage_path, format),
+ 'ArchivePath' => archive_file_path(storage_path, commit.id, prefix, format),
'CommitId' => commit.id
}
end
- def archive_file_path(name, storage_path, format = "tar.gz")
+ # This is both the filename of the archive (missing the extension) and the
+ # name of the top-level member of the archive under which all files go
+ #
+ # FIXME: The generated prefix is incorrect for projects with hashed
+ # storage enabled
+ def archive_prefix(ref, sha, append_sha:)
+ append_sha = (ref != sha) if append_sha.nil?
+
+ project_name = self.name.chomp('.git')
+ formatted_ref = ref.tr('/', '-')
+
+ prefix_segments = [project_name, formatted_ref]
+ prefix_segments << sha if append_sha
+
+ prefix_segments.join('-')
+ end
+ private :archive_prefix
+
+ # The full path on disk where the archive should be stored. This is used
+ # to cache the archive between requests.
+ #
+ # The path is a global namespace, so needs to be globally unique. This is
+ # achieved by including `gl_repository` in the path.
+ #
+ # Archives relating to a particular ref when the SHA is not present in the
+ # filename must be invalidated when the ref is updated to point to a new
+ # SHA. This is achieved by including the SHA in the path.
+ #
+ # As this is a full path on disk, it is not "cloud native". This should
+ # be resolved by either removing the cache, or moving the implementation
+ # into Gitaly and removing the ArchivePath parameter from the git-archive
+ # senddata response.
+ def archive_file_path(storage_path, sha, name, format = "tar.gz")
# Build file path
return nil unless name
@@ -442,8 +462,9 @@ module Gitlab
end
file_name = "#{name}.#{extension}"
- File.join(storage_path, self.name, file_name)
+ File.join(storage_path, self.gl_repository, sha, file_name)
end
+ private :archive_file_path
# Return repo size in megabytes
def size
diff --git a/spec/lib/gitlab/git/repository_spec.rb b/spec/lib/gitlab/git/repository_spec.rb
index 022edd5798d..62fcbc041b3 100644
--- a/spec/lib/gitlab/git/repository_spec.rb
+++ b/spec/lib/gitlab/git/repository_spec.rb
@@ -234,59 +234,72 @@ describe Gitlab::Git::Repository, seed_helper: true do
it_behaves_like 'wrapping gRPC errors', Gitlab::GitalyClient::RefService, :tag_names
end
- shared_examples 'archive check' do |extenstion|
- it { expect(metadata['ArchivePath']).to match(%r{tmp/gitlab-git-test.git/gitlab-git-test-master-#{SeedRepo::LastCommit::ID}}) }
- it { expect(metadata['ArchivePath']).to end_with extenstion }
- end
+ describe '#archive_metadata' do
+ let(:storage_path) { '/tmp' }
+ let(:cache_key) { File.join(repository.gl_repository, SeedRepo::LastCommit::ID) }
- describe '#archive_prefix' do
- let(:project_name) { 'project-name'}
+ let(:append_sha) { true }
+ let(:ref) { 'master' }
+ let(:format) { nil }
- before do
- expect(repository).to receive(:name).once.and_return(project_name)
- end
+ let(:expected_extension) { 'tar.gz' }
+ let(:expected_filename) { "#{expected_prefix}.#{expected_extension}" }
+ let(:expected_path) { File.join(storage_path, cache_key, expected_filename) }
+ let(:expected_prefix) { "gitlab-git-test-#{ref}-#{SeedRepo::LastCommit::ID}" }
- it 'returns parameterised string for a ref containing slashes' do
- prefix = repository.archive_prefix('test/branch', 'SHA', append_sha: nil)
+ subject(:metadata) { repository.archive_metadata(ref, storage_path, format, append_sha: append_sha) }
- expect(prefix).to eq("#{project_name}-test-branch-SHA")
+ it 'sets RepoPath to the repository path' do
+ expect(metadata['RepoPath']).to eq(repository.path)
end
- it 'returns correct string for a ref containing dots' do
- prefix = repository.archive_prefix('test.branch', 'SHA', append_sha: nil)
-
- expect(prefix).to eq("#{project_name}-test.branch-SHA")
+ it 'sets CommitId to the commit SHA' do
+ expect(metadata['CommitId']).to eq(SeedRepo::LastCommit::ID)
end
- it 'returns string with sha when append_sha is false' do
- prefix = repository.archive_prefix('test.branch', 'SHA', append_sha: false)
-
- expect(prefix).to eq("#{project_name}-test.branch")
+ it 'sets ArchivePrefix to the expected prefix' do
+ expect(metadata['ArchivePrefix']).to eq(expected_prefix)
end
- end
- describe '#archive' do
- let(:metadata) { repository.archive_metadata('master', '/tmp', append_sha: true) }
+ it 'sets ArchivePath to the expected globally-unique path' do
+ # This is really important from a security perspective. Think carefully
+ # before changing it: https://gitlab.com/gitlab-org/gitlab-ce/issues/45689
+ expect(expected_path).to include(File.join(repository.gl_repository, SeedRepo::LastCommit::ID))
- it_should_behave_like 'archive check', '.tar.gz'
- end
-
- describe '#archive_zip' do
- let(:metadata) { repository.archive_metadata('master', '/tmp', 'zip', append_sha: true) }
+ expect(metadata['ArchivePath']).to eq(expected_path)
+ end
- it_should_behave_like 'archive check', '.zip'
- end
+ context 'append_sha varies archive path and filename' do
+ where(:append_sha, :ref, :expected_prefix) do
+ sha = SeedRepo::LastCommit::ID
- describe '#archive_bz2' do
- let(:metadata) { repository.archive_metadata('master', '/tmp', 'tbz2', append_sha: true) }
+ true | 'master' | "gitlab-git-test-master-#{sha}"
+ true | sha | "gitlab-git-test-#{sha}-#{sha}"
+ false | 'master' | "gitlab-git-test-master"
+ false | sha | "gitlab-git-test-#{sha}"
+ nil | 'master' | "gitlab-git-test-master-#{sha}"
+ nil | sha | "gitlab-git-test-#{sha}"
+ end
- it_should_behave_like 'archive check', '.tar.bz2'
- end
+ with_them do
+ it { expect(metadata['ArchivePrefix']).to eq(expected_prefix) }
+ it { expect(metadata['ArchivePath']).to eq(expected_path) }
+ end
+ end
- describe '#archive_fallback' do
- let(:metadata) { repository.archive_metadata('master', '/tmp', 'madeup', append_sha: true) }
+ context 'format varies archive path and filename' do
+ where(:format, :expected_extension) do
+ nil | 'tar.gz'
+ 'madeup' | 'tar.gz'
+ 'tbz2' | 'tar.bz2'
+ 'zip' | 'zip'
+ end
- it_should_behave_like 'archive check', '.tar.gz'
+ with_them do
+ it { expect(metadata['ArchivePrefix']).to eq(expected_prefix) }
+ it { expect(metadata['ArchivePath']).to eq(expected_path) }
+ end
+ end
end
describe '#size' do
diff --git a/spec/services/repository_archive_clean_up_service_spec.rb b/spec/services/repository_archive_clean_up_service_spec.rb
index 2d7fa3f80f7..ab1c638fc39 100644
--- a/spec/services/repository_archive_clean_up_service_spec.rb
+++ b/spec/services/repository_archive_clean_up_service_spec.rb
@@ -1,15 +1,47 @@
require 'spec_helper'
describe RepositoryArchiveCleanUpService do
- describe '#execute' do
- subject(:service) { described_class.new }
+ subject(:service) { described_class.new }
+ describe '#execute (new archive locations)' do
+ let(:sha) { "0" * 40 }
+
+ it 'removes outdated archives and directories in a new-style path' do
+ in_directory_with_files("project-999/#{sha}", %w[tar tar.bz2 tar.gz zip], 3.hours) do |dirname, files|
+ service.execute
+
+ files.each { |filename| expect(File.exist?(filename)).to be_falsy }
+ expect(File.directory?(dirname)).to be_falsy
+ expect(File.directory?(File.dirname(dirname))).to be_falsy
+ end
+ end
+
+ it 'does not remove directories when they contain outdated non-archives' do
+ in_directory_with_files("project-999/#{sha}", %w[tar conf rb], 2.hours) do |dirname, files|
+ service.execute
+
+ expect(File.directory?(dirname)).to be_truthy
+ end
+ end
+
+ it 'does not remove in-date archives in a new-style path' do
+ in_directory_with_files("project-999/#{sha}", %w[tar tar.bz2 tar.gz zip], 1.hour) do |dirname, files|
+ service.execute
+
+ files.each { |filename| expect(File.exist?(filename)).to be_truthy }
+ end
+ end
+ end
+
+ describe '#execute (legacy archive locations)' do
context 'when the downloads directory does not exist' do
it 'does not remove any archives' do
path = '/invalid/path/'
stub_repository_downloads_path(path)
+ allow(File).to receive(:directory?).and_call_original
expect(File).to receive(:directory?).with(path).and_return(false)
+
expect(service).not_to receive(:clean_up_old_archives)
expect(service).not_to receive(:clean_up_empty_directories)
@@ -19,7 +51,7 @@ describe RepositoryArchiveCleanUpService do
context 'when the downloads directory exists' do
shared_examples 'invalid archive files' do |dirname, extensions, mtime|
- it 'does not remove files and directoy' do
+ it 'does not remove files and directory' do
in_directory_with_files(dirname, extensions, mtime) do |dir, files|
service.execute
@@ -43,7 +75,7 @@ describe RepositoryArchiveCleanUpService do
end
context 'with files older than 2 hours inside invalid directories' do
- it_behaves_like 'invalid archive files', 'john_doe/sample.git', %w[conf rb tar tar.gz], 2.hours
+ it_behaves_like 'invalid archive files', 'john/doe/sample.git', %w[conf rb tar tar.gz], 2.hours
end
context 'with files newer than 2 hours that matches valid archive extensions' do
@@ -58,24 +90,24 @@ describe RepositoryArchiveCleanUpService do
it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb tar tar.gz], 1.hour
end
end
+ end
- def in_directory_with_files(dirname, extensions, mtime)
- Dir.mktmpdir do |tmpdir|
- stub_repository_downloads_path(tmpdir)
- dir = File.join(tmpdir, dirname)
- files = create_temporary_files(dir, extensions, mtime)
+ def in_directory_with_files(dirname, extensions, mtime)
+ Dir.mktmpdir do |tmpdir|
+ stub_repository_downloads_path(tmpdir)
+ dir = File.join(tmpdir, dirname)
+ files = create_temporary_files(dir, extensions, mtime)
- yield(dir, files)
- end
+ yield(dir, files)
end
+ end
- def stub_repository_downloads_path(path)
- allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path)
- end
+ def stub_repository_downloads_path(path)
+ allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path)
+ end
- def create_temporary_files(dir, extensions, mtime)
- FileUtils.mkdir_p(dir)
- FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime)
- end
+ def create_temporary_files(dir, extensions, mtime)
+ FileUtils.mkdir_p(dir)
+ FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime)
end
end