diff options
author | Douwe Maan <douwe@gitlab.com> | 2018-04-25 08:22:43 +0000 |
---|---|---|
committer | Mayra Cabrera <mcabrera@gitlab.com> | 2018-04-30 15:00:14 -0500 |
commit | 9cf4e4734192c7234a97f1a7f472eed3ce7a2448 (patch) | |
tree | ce101a37bfc462c0c232c9f8cb89c67bda4c974b | |
parent | e71351d4f463715fccd80ddbcb4dade67e80f34b (diff) | |
download | gitlab-ce-9cf4e4734192c7234a97f1a7f472eed3ce7a2448.tar.gz |
Merge branch 'security-45689-fix-archive-cache-bug' into 'security-10-7'
Serve archive requests with the correct file in all cases (10.7)
See merge request gitlab/gitlabhq!2376
-rw-r--r-- | app/services/repository_archive_clean_up_service.rb | 5 | ||||
-rw-r--r-- | changelogs/unreleased/security-45689-fix-archive-cache-bug.yml | 5 | ||||
-rw-r--r-- | lib/gitlab/git/repository.rb | 51 | ||||
-rw-r--r-- | spec/lib/gitlab/git/repository_spec.rb | 87 | ||||
-rw-r--r-- | spec/services/repository_archive_clean_up_service_spec.rb | 68 |
5 files changed, 144 insertions, 72 deletions
diff --git a/app/services/repository_archive_clean_up_service.rb b/app/services/repository_archive_clean_up_service.rb index 9a88459b511..ba7be4b3f89 100644 --- a/app/services/repository_archive_clean_up_service.rb +++ b/app/services/repository_archive_clean_up_service.rb @@ -20,11 +20,12 @@ class RepositoryArchiveCleanUpService private def clean_up_old_archives - run(%W(find #{path} -not -path #{path} -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -maxdepth 2 -mmin +#{mmin} -delete)) + run(%W(find #{path} -mindepth 1 -maxdepth 3 -type f \( -name \*.tar -o -name \*.bz2 -o -name \*.tar.gz -o -name \*.zip \) -mmin +#{mmin} -delete)) end def clean_up_empty_directories - run(%W(find #{path} -not -path #{path} -type d -empty -name \*.git -maxdepth 1 -delete)) + run(%W(find #{path} -mindepth 2 -maxdepth 2 -type d -empty -delete)) + run(%W(find #{path} -mindepth 1 -maxdepth 1 -type d -empty -delete)) end def run(cmd) diff --git a/changelogs/unreleased/security-45689-fix-archive-cache-bug.yml b/changelogs/unreleased/security-45689-fix-archive-cache-bug.yml new file mode 100644 index 00000000000..0103a7fc430 --- /dev/null +++ b/changelogs/unreleased/security-45689-fix-archive-cache-bug.yml @@ -0,0 +1,5 @@ +--- +title: Serve archive requests with the correct file in all cases +merge_request: +author: +type: security diff --git a/lib/gitlab/git/repository.rb b/lib/gitlab/git/repository.rb index 0ad2d385051..de0044fc149 100644 --- a/lib/gitlab/git/repository.rb +++ b/lib/gitlab/git/repository.rb @@ -391,18 +391,6 @@ module Gitlab nil end - def archive_prefix(ref, sha, append_sha:) - append_sha = (ref != sha) if append_sha.nil? - - project_name = self.name.chomp('.git') - formatted_ref = ref.tr('/', '-') - - prefix_segments = [project_name, formatted_ref] - prefix_segments << sha if append_sha - - prefix_segments.join('-') - end - def archive_metadata(ref, storage_path, format = "tar.gz", append_sha:) ref ||= root_ref commit = Gitlab::Git::Commit.find(self, ref) @@ -413,12 +401,44 @@ module Gitlab { 'RepoPath' => path, 'ArchivePrefix' => prefix, - 'ArchivePath' => archive_file_path(prefix, storage_path, format), + 'ArchivePath' => archive_file_path(storage_path, commit.id, prefix, format), 'CommitId' => commit.id } end - def archive_file_path(name, storage_path, format = "tar.gz") + # This is both the filename of the archive (missing the extension) and the + # name of the top-level member of the archive under which all files go + # + # FIXME: The generated prefix is incorrect for projects with hashed + # storage enabled + def archive_prefix(ref, sha, append_sha:) + append_sha = (ref != sha) if append_sha.nil? + + project_name = self.name.chomp('.git') + formatted_ref = ref.tr('/', '-') + + prefix_segments = [project_name, formatted_ref] + prefix_segments << sha if append_sha + + prefix_segments.join('-') + end + private :archive_prefix + + # The full path on disk where the archive should be stored. This is used + # to cache the archive between requests. + # + # The path is a global namespace, so needs to be globally unique. This is + # achieved by including `gl_repository` in the path. + # + # Archives relating to a particular ref when the SHA is not present in the + # filename must be invalidated when the ref is updated to point to a new + # SHA. This is achieved by including the SHA in the path. + # + # As this is a full path on disk, it is not "cloud native". This should + # be resolved by either removing the cache, or moving the implementation + # into Gitaly and removing the ArchivePath parameter from the git-archive + # senddata response. + def archive_file_path(storage_path, sha, name, format = "tar.gz") # Build file path return nil unless name @@ -436,8 +456,9 @@ module Gitlab end file_name = "#{name}.#{extension}" - File.join(storage_path, self.name, file_name) + File.join(storage_path, self.gl_repository, sha, file_name) end + private :archive_file_path # Return repo size in megabytes def size diff --git a/spec/lib/gitlab/git/repository_spec.rb b/spec/lib/gitlab/git/repository_spec.rb index da1a6229ccf..9924641f829 100644 --- a/spec/lib/gitlab/git/repository_spec.rb +++ b/spec/lib/gitlab/git/repository_spec.rb @@ -234,59 +234,72 @@ describe Gitlab::Git::Repository, seed_helper: true do it_behaves_like 'wrapping gRPC errors', Gitlab::GitalyClient::RefService, :tag_names end - shared_examples 'archive check' do |extenstion| - it { expect(metadata['ArchivePath']).to match(%r{tmp/gitlab-git-test.git/gitlab-git-test-master-#{SeedRepo::LastCommit::ID}}) } - it { expect(metadata['ArchivePath']).to end_with extenstion } - end + describe '#archive_metadata' do + let(:storage_path) { '/tmp' } + let(:cache_key) { File.join(repository.gl_repository, SeedRepo::LastCommit::ID) } - describe '#archive_prefix' do - let(:project_name) { 'project-name'} + let(:append_sha) { true } + let(:ref) { 'master' } + let(:format) { nil } - before do - expect(repository).to receive(:name).once.and_return(project_name) - end + let(:expected_extension) { 'tar.gz' } + let(:expected_filename) { "#{expected_prefix}.#{expected_extension}" } + let(:expected_path) { File.join(storage_path, cache_key, expected_filename) } + let(:expected_prefix) { "gitlab-git-test-#{ref}-#{SeedRepo::LastCommit::ID}" } - it 'returns parameterised string for a ref containing slashes' do - prefix = repository.archive_prefix('test/branch', 'SHA', append_sha: nil) + subject(:metadata) { repository.archive_metadata(ref, storage_path, format, append_sha: append_sha) } - expect(prefix).to eq("#{project_name}-test-branch-SHA") + it 'sets RepoPath to the repository path' do + expect(metadata['RepoPath']).to eq(repository.path) end - it 'returns correct string for a ref containing dots' do - prefix = repository.archive_prefix('test.branch', 'SHA', append_sha: nil) - - expect(prefix).to eq("#{project_name}-test.branch-SHA") + it 'sets CommitId to the commit SHA' do + expect(metadata['CommitId']).to eq(SeedRepo::LastCommit::ID) end - it 'returns string with sha when append_sha is false' do - prefix = repository.archive_prefix('test.branch', 'SHA', append_sha: false) - - expect(prefix).to eq("#{project_name}-test.branch") + it 'sets ArchivePrefix to the expected prefix' do + expect(metadata['ArchivePrefix']).to eq(expected_prefix) end - end - describe '#archive' do - let(:metadata) { repository.archive_metadata('master', '/tmp', append_sha: true) } + it 'sets ArchivePath to the expected globally-unique path' do + # This is really important from a security perspective. Think carefully + # before changing it: https://gitlab.com/gitlab-org/gitlab-ce/issues/45689 + expect(expected_path).to include(File.join(repository.gl_repository, SeedRepo::LastCommit::ID)) - it_should_behave_like 'archive check', '.tar.gz' - end - - describe '#archive_zip' do - let(:metadata) { repository.archive_metadata('master', '/tmp', 'zip', append_sha: true) } + expect(metadata['ArchivePath']).to eq(expected_path) + end - it_should_behave_like 'archive check', '.zip' - end + context 'append_sha varies archive path and filename' do + where(:append_sha, :ref, :expected_prefix) do + sha = SeedRepo::LastCommit::ID - describe '#archive_bz2' do - let(:metadata) { repository.archive_metadata('master', '/tmp', 'tbz2', append_sha: true) } + true | 'master' | "gitlab-git-test-master-#{sha}" + true | sha | "gitlab-git-test-#{sha}-#{sha}" + false | 'master' | "gitlab-git-test-master" + false | sha | "gitlab-git-test-#{sha}" + nil | 'master' | "gitlab-git-test-master-#{sha}" + nil | sha | "gitlab-git-test-#{sha}" + end - it_should_behave_like 'archive check', '.tar.bz2' - end + with_them do + it { expect(metadata['ArchivePrefix']).to eq(expected_prefix) } + it { expect(metadata['ArchivePath']).to eq(expected_path) } + end + end - describe '#archive_fallback' do - let(:metadata) { repository.archive_metadata('master', '/tmp', 'madeup', append_sha: true) } + context 'format varies archive path and filename' do + where(:format, :expected_extension) do + nil | 'tar.gz' + 'madeup' | 'tar.gz' + 'tbz2' | 'tar.bz2' + 'zip' | 'zip' + end - it_should_behave_like 'archive check', '.tar.gz' + with_them do + it { expect(metadata['ArchivePrefix']).to eq(expected_prefix) } + it { expect(metadata['ArchivePath']).to eq(expected_path) } + end + end end describe '#size' do diff --git a/spec/services/repository_archive_clean_up_service_spec.rb b/spec/services/repository_archive_clean_up_service_spec.rb index 2d7fa3f80f7..ab1c638fc39 100644 --- a/spec/services/repository_archive_clean_up_service_spec.rb +++ b/spec/services/repository_archive_clean_up_service_spec.rb @@ -1,15 +1,47 @@ require 'spec_helper' describe RepositoryArchiveCleanUpService do - describe '#execute' do - subject(:service) { described_class.new } + subject(:service) { described_class.new } + describe '#execute (new archive locations)' do + let(:sha) { "0" * 40 } + + it 'removes outdated archives and directories in a new-style path' do + in_directory_with_files("project-999/#{sha}", %w[tar tar.bz2 tar.gz zip], 3.hours) do |dirname, files| + service.execute + + files.each { |filename| expect(File.exist?(filename)).to be_falsy } + expect(File.directory?(dirname)).to be_falsy + expect(File.directory?(File.dirname(dirname))).to be_falsy + end + end + + it 'does not remove directories when they contain outdated non-archives' do + in_directory_with_files("project-999/#{sha}", %w[tar conf rb], 2.hours) do |dirname, files| + service.execute + + expect(File.directory?(dirname)).to be_truthy + end + end + + it 'does not remove in-date archives in a new-style path' do + in_directory_with_files("project-999/#{sha}", %w[tar tar.bz2 tar.gz zip], 1.hour) do |dirname, files| + service.execute + + files.each { |filename| expect(File.exist?(filename)).to be_truthy } + end + end + end + + describe '#execute (legacy archive locations)' do context 'when the downloads directory does not exist' do it 'does not remove any archives' do path = '/invalid/path/' stub_repository_downloads_path(path) + allow(File).to receive(:directory?).and_call_original expect(File).to receive(:directory?).with(path).and_return(false) + expect(service).not_to receive(:clean_up_old_archives) expect(service).not_to receive(:clean_up_empty_directories) @@ -19,7 +51,7 @@ describe RepositoryArchiveCleanUpService do context 'when the downloads directory exists' do shared_examples 'invalid archive files' do |dirname, extensions, mtime| - it 'does not remove files and directoy' do + it 'does not remove files and directory' do in_directory_with_files(dirname, extensions, mtime) do |dir, files| service.execute @@ -43,7 +75,7 @@ describe RepositoryArchiveCleanUpService do end context 'with files older than 2 hours inside invalid directories' do - it_behaves_like 'invalid archive files', 'john_doe/sample.git', %w[conf rb tar tar.gz], 2.hours + it_behaves_like 'invalid archive files', 'john/doe/sample.git', %w[conf rb tar tar.gz], 2.hours end context 'with files newer than 2 hours that matches valid archive extensions' do @@ -58,24 +90,24 @@ describe RepositoryArchiveCleanUpService do it_behaves_like 'invalid archive files', 'sample.git', %w[conf rb tar tar.gz], 1.hour end end + end - def in_directory_with_files(dirname, extensions, mtime) - Dir.mktmpdir do |tmpdir| - stub_repository_downloads_path(tmpdir) - dir = File.join(tmpdir, dirname) - files = create_temporary_files(dir, extensions, mtime) + def in_directory_with_files(dirname, extensions, mtime) + Dir.mktmpdir do |tmpdir| + stub_repository_downloads_path(tmpdir) + dir = File.join(tmpdir, dirname) + files = create_temporary_files(dir, extensions, mtime) - yield(dir, files) - end + yield(dir, files) end + end - def stub_repository_downloads_path(path) - allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path) - end + def stub_repository_downloads_path(path) + allow(Gitlab.config.gitlab).to receive(:repository_downloads_path).and_return(path) + end - def create_temporary_files(dir, extensions, mtime) - FileUtils.mkdir_p(dir) - FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime) - end + def create_temporary_files(dir, extensions, mtime) + FileUtils.mkdir_p(dir) + FileUtils.touch(extensions.map { |ext| File.join(dir, "sample.#{ext}") }, mtime: Time.now - mtime) end end |