From 6a66e4a1f116ff98d8f9b8dffa3beed62d74e0b3 Mon Sep 17 00:00:00 2001 From: Dylan Griffith Date: Mon, 26 Nov 2018 14:54:44 +0100 Subject: Don't remove failed install pods We want to keep failed install pods around so that it is easier to debug why a failure occured. With this change we also need to ensure that we remove a previous pod with the same name before installing so that re-install does not fail. Another change here is that we no longer need to catch errors from delete_pod! in CheckInstallationProgressService as we now catch the ResourceNotFoundError in Helm::Api. The catch statement in CheckInstallationProgressService was also probably too broad before and should have been narrowed down simply to ResourceNotFoundError. --- .../check_installation_progress_service.rb | 11 ++------ .../51792-dont-delete-failed-install-pods.yml | 5 ++++ lib/gitlab/kubernetes/helm/api.rb | 6 +++++ spec/lib/gitlab/kubernetes/helm/api_spec.rb | 29 ++++++++++++++++++++-- .../check_installation_progress_service_spec.rb | 22 ++++++---------- 5 files changed, 47 insertions(+), 26 deletions(-) create mode 100644 changelogs/unreleased/51792-dont-delete-failed-install-pods.yml diff --git a/app/services/clusters/applications/check_installation_progress_service.rb b/app/services/clusters/applications/check_installation_progress_service.rb index ca0f7b30053..6794580e1e8 100644 --- a/app/services/clusters/applications/check_installation_progress_service.rb +++ b/app/services/clusters/applications/check_installation_progress_service.rb @@ -29,17 +29,13 @@ module Clusters end def on_failed - app.make_errored!('Installation failed') - ensure - remove_installation_pod + app.make_errored!("Installation failed. Check pod logs for #{install_command.pod_name} for more details.") end def check_timeout if timeouted? begin - app.make_errored!('Installation timed out') - ensure - remove_installation_pod + app.make_errored!("Installation timed out. Check pod logs for #{install_command.pod_name} for more details.") end else ClusterWaitForAppInstallationWorker.perform_in( @@ -53,9 +49,6 @@ module Clusters def remove_installation_pod helm_api.delete_pod!(install_command.pod_name) - rescue => e - Rails.logger.error("Kubernetes error: #{e.class.name} #{e.message}") - # no-op end def installation_phase diff --git a/changelogs/unreleased/51792-dont-delete-failed-install-pods.yml b/changelogs/unreleased/51792-dont-delete-failed-install-pods.yml new file mode 100644 index 00000000000..7a900cbb86e --- /dev/null +++ b/changelogs/unreleased/51792-dont-delete-failed-install-pods.yml @@ -0,0 +1,5 @@ +--- +title: Don't remove failed install pods after installing GitLab managed applications +merge_request: 23350 +author: +type: changed diff --git a/lib/gitlab/kubernetes/helm/api.rb b/lib/gitlab/kubernetes/helm/api.rb index fd3d187cbc3..b9903e37f40 100644 --- a/lib/gitlab/kubernetes/helm/api.rb +++ b/lib/gitlab/kubernetes/helm/api.rb @@ -16,12 +16,16 @@ module Gitlab create_cluster_role_binding(command) create_config_map(command) + delete_pod!(command.pod_name) kubeclient.create_pod(command.pod_resource) end def update(command) namespace.ensure_exists! + update_config_map(command) + + delete_pod!(command.pod_name) kubeclient.create_pod(command.pod_resource) end @@ -42,6 +46,8 @@ module Gitlab def delete_pod!(pod_name) kubeclient.delete_pod(pod_name, namespace.name) + rescue ::Kubeclient::ResourceNotFoundError + # no-op end def get_config_map(config_map_name) diff --git a/spec/lib/gitlab/kubernetes/helm/api_spec.rb b/spec/lib/gitlab/kubernetes/helm/api_spec.rb index 8bce7a4cdf5..c7f92cbb143 100644 --- a/spec/lib/gitlab/kubernetes/helm/api_spec.rb +++ b/spec/lib/gitlab/kubernetes/helm/api_spec.rb @@ -40,6 +40,7 @@ describe Gitlab::Kubernetes::Helm::Api do allow(client).to receive(:create_config_map).and_return(nil) allow(client).to receive(:create_service_account).and_return(nil) allow(client).to receive(:create_cluster_role_binding).and_return(nil) + allow(client).to receive(:delete_pod).and_return(nil) allow(namespace).to receive(:ensure_exists!).once end @@ -50,6 +51,13 @@ describe Gitlab::Kubernetes::Helm::Api do subject.install(command) end + it 'removes an existing pod before installing' do + expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps').once.ordered + expect(client).to receive(:create_pod).once.ordered + + subject.install(command) + end + context 'with a ConfigMap' do let(:resource) { Gitlab::Kubernetes::ConfigMap.new(application_name, files).generate } @@ -180,6 +188,7 @@ describe Gitlab::Kubernetes::Helm::Api do allow(client).to receive(:update_config_map).and_return(nil) allow(client).to receive(:create_pod).and_return(nil) + allow(client).to receive(:delete_pod).and_return(nil) end it 'ensures the namespace exists before creating the pod' do @@ -189,6 +198,13 @@ describe Gitlab::Kubernetes::Helm::Api do subject.update(command) end + it 'removes an existing pod before updating' do + expect(client).to receive(:delete_pod).with('upgrade-app-name', 'gitlab-managed-apps').once.ordered + expect(client).to receive(:create_pod).once.ordered + + subject.update(command) + end + it 'updates the config map on kubeclient when one exists' do resource = Gitlab::Kubernetes::ConfigMap.new( application_name, files @@ -224,9 +240,18 @@ describe Gitlab::Kubernetes::Helm::Api do describe '#delete_pod!' do it 'deletes the POD from kubernetes cluster' do - expect(client).to receive(:delete_pod).with(command.pod_name, gitlab_namespace).once + expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps').once - subject.delete_pod!(command.pod_name) + subject.delete_pod!('install-app-name') + end + + context 'when the resource being deleted does not exist' do + it 'catches the error' do + expect(client).to receive(:delete_pod).with('install-app-name', 'gitlab-managed-apps') + .and_raise(Kubeclient::ResourceNotFoundError.new(404, 'Not found', nil)) + + subject.delete_pod!('install-app-name') + end end end diff --git a/spec/services/clusters/applications/check_installation_progress_service_spec.rb b/spec/services/clusters/applications/check_installation_progress_service_spec.rb index ea17f2bb423..9452a9e38fb 100644 --- a/spec/services/clusters/applications/check_installation_progress_service_spec.rb +++ b/spec/services/clusters/applications/check_installation_progress_service_spec.rb @@ -8,14 +8,6 @@ describe Clusters::Applications::CheckInstallationProgressService do let(:phase) { Gitlab::Kubernetes::Pod::UNKNOWN } let(:errors) { nil } - shared_examples 'a terminated installation' do - it 'removes the installation POD' do - expect(service).to receive(:remove_installation_pod).once - - service.execute - end - end - shared_examples 'a not yet terminated installation' do |a_phase| let(:phase) { a_phase } @@ -39,15 +31,13 @@ describe Clusters::Applications::CheckInstallationProgressService do context 'when timeouted' do let(:application) { create(:clusters_applications_helm, :timeouted) } - it_behaves_like 'a terminated installation' - it 'make the application errored' do expect(ClusterWaitForAppInstallationWorker).not_to receive(:perform_in) service.execute expect(application).to be_errored - expect(application.status_reason).to match(/\btimed out\b/) + expect(application.status_reason).to eq("Installation timed out. Check pod logs for install-helm for more details.") end end end @@ -66,7 +56,11 @@ describe Clusters::Applications::CheckInstallationProgressService do expect(service).to receive(:installation_phase).once.and_return(phase) end - it_behaves_like 'a terminated installation' + it 'removes the installation POD' do + expect(service).to receive(:remove_installation_pod).once + + service.execute + end it 'make the application installed' do expect(ClusterWaitForAppInstallationWorker).not_to receive(:perform_in) @@ -86,13 +80,11 @@ describe Clusters::Applications::CheckInstallationProgressService do expect(service).to receive(:installation_phase).once.and_return(phase) end - it_behaves_like 'a terminated installation' - it 'make the application errored' do service.execute expect(application).to be_errored - expect(application.status_reason).to eq("Installation failed") + expect(application.status_reason).to eq("Installation failed. Check pod logs for install-helm for more details.") end end -- cgit v1.2.1