summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGitLab Bot <gitlab-bot@gitlab.com>2019-11-03 03:06:20 +0000
committerGitLab Bot <gitlab-bot@gitlab.com>2019-11-03 03:06:20 +0000
commitd4aaea5efabc948b1a04a62a032d18f34d1a3f31 (patch)
treeaa7d77e37830afbade6f154326d09643354debb5
parent8423ed74e617cfba4a94f31a03c2ae0d799bf82f (diff)
downloadgitlab-ce-d4aaea5efabc948b1a04a62a032d18f34d1a3f31.tar.gz
Add latest changes from gitlab-org/gitlab@master
-rw-r--r--changelogs/unreleased/bvl-robust-gpg-homedir-cleanup.yml5
-rw-r--r--lib/gitlab/gpg.rb49
-rw-r--r--spec/lib/gitlab/gpg_spec.rb90
3 files changed, 141 insertions, 3 deletions
diff --git a/changelogs/unreleased/bvl-robust-gpg-homedir-cleanup.yml b/changelogs/unreleased/bvl-robust-gpg-homedir-cleanup.yml
new file mode 100644
index 00000000000..766e9f16968
--- /dev/null
+++ b/changelogs/unreleased/bvl-robust-gpg-homedir-cleanup.yml
@@ -0,0 +1,5 @@
+---
+title: Improve handling of gpg-agent processes
+merge_request: 19311
+author:
+type: changed
diff --git a/lib/gitlab/gpg.rb b/lib/gitlab/gpg.rb
index 32f61b1d65c..1dce26efc65 100644
--- a/lib/gitlab/gpg.rb
+++ b/lib/gitlab/gpg.rb
@@ -4,6 +4,10 @@ module Gitlab
module Gpg
extend self
+ CleanupError = Class.new(StandardError)
+ BG_CLEANUP_RUNTIME_S = 2
+ FG_CLEANUP_RUNTIME_S = 0.5
+
MUTEX = Mutex.new
module CurrentKeyChain
@@ -94,16 +98,55 @@ module Gitlab
previous_dir = current_home_dir
tmp_dir = Dir.mktmpdir
GPGME::Engine.home_dir = tmp_dir
+ tmp_keychains_created.increment
+
yield
ensure
- # Ignore any errors when removing the tmp directory, as we may run into a
+ GPGME::Engine.home_dir = previous_dir
+
+ begin
+ cleanup_tmp_dir(tmp_dir)
+ rescue CleanupError => e
+ # This means we left a GPG-agent process hanging. Logging the problem in
+ # sentry will make this more visible.
+ Gitlab::Sentry.track_exception(e,
+ issue_url: 'https://gitlab.com/gitlab-org/gitlab/issues/20918',
+ extra: { tmp_dir: tmp_dir })
+ end
+
+ tmp_keychains_removed.increment unless File.exist?(tmp_dir)
+ end
+
+ def cleanup_tmp_dir(tmp_dir)
+ return FileUtils.remove_entry(tmp_dir, true) if Feature.disabled?(:gpg_cleanup_retries)
+
+ # Retry when removing the tmp directory failed, as we may run into a
# race condition:
# The `gpg-agent` agent process may clean up some files as well while
# `FileUtils.remove_entry` is iterating the directory and removing all
# its contained files and directories recursively, which could raise an
# error.
- FileUtils.remove_entry(tmp_dir, true)
- GPGME::Engine.home_dir = previous_dir
+ # Failing to remove the tmp directory could leave the `gpg-agent` process
+ # running forever.
+ Retriable.retriable(max_elapsed_time: cleanup_time, base_interval: 0.1) do
+ FileUtils.remove_entry(tmp_dir) if File.exist?(tmp_dir)
+ end
+ rescue => e
+ raise CleanupError, e
+ end
+
+ def cleanup_time
+ Sidekiq.server? ? BG_CLEANUP_RUNTIME_S : FG_CLEANUP_RUNTIME_S
+ end
+
+ def tmp_keychains_created
+ @tmp_keychains_created ||= Gitlab::Metrics.counter(:gpg_tmp_keychains_created_total,
+ 'The number of temporary GPG keychains created')
+ end
+
+ def tmp_keychains_removed
+ @tmp_keychains_removed ||= Gitlab::Metrics.counter(:gpg_tmp_keychains_removed_total,
+ 'The number of temporary GPG keychains removed')
end
end
end
diff --git a/spec/lib/gitlab/gpg_spec.rb b/spec/lib/gitlab/gpg_spec.rb
index 77d318c9b23..8ba7ea4d237 100644
--- a/spec/lib/gitlab/gpg_spec.rb
+++ b/spec/lib/gitlab/gpg_spec.rb
@@ -139,6 +139,96 @@ describe Gitlab::Gpg do
end
end.not_to raise_error
end
+
+ it 'keeps track of created and removed keychains in counters' do
+ created = Gitlab::Metrics.counter(:gpg_tmp_keychains_created_total, 'The number of temporary GPG keychains')
+ removed = Gitlab::Metrics.counter(:gpg_tmp_keychains_removed_total, 'The number of temporary GPG keychains')
+
+ initial_created = created.get
+ initial_removed = removed.get
+
+ described_class.using_tmp_keychain do
+ expect(created.get).to eq(initial_created + 1)
+ expect(removed.get).to eq(initial_removed)
+ end
+
+ expect(removed.get).to eq(initial_removed + 1)
+ end
+
+ it 'cleans up the tmp directory after finishing' do
+ tmp_directory = nil
+
+ described_class.using_tmp_keychain do
+ tmp_directory = described_class.current_home_dir
+ expect(File.exist?(tmp_directory)).to be true
+ end
+
+ expect(tmp_directory).not_to be_nil
+ expect(File.exist?(tmp_directory)).to be false
+ end
+
+ it 'does not fail if the homedir was deleted while running' do
+ expect do
+ described_class.using_tmp_keychain do
+ FileUtils.remove_entry(described_class.current_home_dir)
+ end
+ end.not_to raise_error
+ end
+
+ shared_examples 'multiple deletion attempts of the tmp-dir' do |seconds|
+ let(:tmp_dir) do
+ tmp_dir = Dir.mktmpdir
+ allow(Dir).to receive(:mktmpdir).and_return(tmp_dir)
+ tmp_dir
+ end
+
+ before do
+ # Stub all the other calls for `remove_entry`
+ allow(FileUtils).to receive(:remove_entry).with(any_args).and_call_original
+ end
+
+ it "tries for #{seconds}" do
+ expect(Retriable).to receive(:retriable).with(a_hash_including(max_elapsed_time: seconds))
+
+ described_class.using_tmp_keychain {}
+ end
+
+ it 'tries at least 2 times to remove the tmp dir before raising', :aggregate_failures do
+ expect(Retriable).to receive(:sleep).at_least(2).times
+ expect(FileUtils).to receive(:remove_entry).with(tmp_dir).at_least(2).times.and_raise('Deletion failed')
+
+ expect { described_class.using_tmp_keychain { } }.to raise_error(described_class::CleanupError)
+ end
+
+ it 'does not attempt multiple times when the deletion succeeds' do
+ expect(Retriable).to receive(:sleep).once
+ expect(FileUtils).to receive(:remove_entry).with(tmp_dir).once.and_raise('Deletion failed')
+ expect(FileUtils).to receive(:remove_entry).with(tmp_dir).and_call_original
+
+ expect { described_class.using_tmp_keychain { } }.not_to raise_error
+
+ expect(File.exist?(tmp_dir)).to be false
+ end
+
+ it 'does not retry when the feature flag is disabled' do
+ stub_feature_flags(gpg_cleanup_retries: false)
+
+ expect(FileUtils).to receive(:remove_entry).with(tmp_dir, true).and_call_original
+ expect(Retriable).not_to receive(:retriable)
+
+ described_class.using_tmp_keychain {}
+ end
+ end
+
+ it_behaves_like 'multiple deletion attempts of the tmp-dir', described_class::FG_CLEANUP_RUNTIME_S
+
+ context 'when running in Sidekiq' do
+ before do
+ allow(Sidekiq).to receive(:server?).and_return(true)
+ end
+
+ it_behaves_like 'multiple deletion attempts of the tmp-dir', described_class::BG_CLEANUP_RUNTIME_S
+ end
end
end