summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLin Jen-Shin <godfat@godfat.org>2019-06-25 14:00:47 +0000
committerLin Jen-Shin <godfat@godfat.org>2019-06-25 14:00:47 +0000
commit51011d1b2ce9958534bdd61e3a17d9e8d0f58823 (patch)
tree4c3a16b8bf70d1c47cd2d7169e08c932690d13b8
parent546355f734f74c040d0ef0917ade50751fd90731 (diff)
parente266911019f66e1e9b1082f72d2663441167b810 (diff)
downloadgitlab-ce-51011d1b2ce9958534bdd61e3a17d9e8d0f58823.tar.gz
Merge branch 'improve-review-apps-cleanup-when-previous-deployment-failed' into 'master'
Improve Review Apps cleanup when previous deployment failed by only issuing an `helm delete` command Closes #63639 and #62161 See merge request gitlab-org/gitlab-ce!28661
-rw-r--r--.gitlab/ci/review.gitlab-ci.yml17
-rwxr-xr-xscripts/review_apps/review-apps.sh102
2 files changed, 67 insertions, 52 deletions
diff --git a/.gitlab/ci/review.gitlab-ci.yml b/.gitlab/ci/review.gitlab-ci.yml
index 9b764028be9..933af90c85a 100644
--- a/.gitlab/ci/review.gitlab-ci.yml
+++ b/.gitlab/ci/review.gitlab-ci.yml
@@ -77,6 +77,7 @@ schedule:review-build-cng:
.review-deploy-base: &review-deploy-base
<<: *review-base
allow_failure: true
+ retry: 2
stage: review
variables:
HOST_SUFFIX: "${CI_ENVIRONMENT_SLUG}"
@@ -95,10 +96,16 @@ schedule:review-build-cng:
- install_api_client_dependencies_with_apk
- source scripts/review_apps/review-apps.sh
script:
- - perform_review_app_deployment
+ - check_kube_domain
+ - ensure_namespace
+ - install_tiller
+ - install_external_dns
+ - download_chart
+ - deploy || display_deployment_debug
+ - wait_for_review_app_to_be_accessible
+ - add_license
artifacts:
- paths:
- - review_app_url.txt
+ paths: [review_app_url.txt]
expire_in: 2 days
when: always
@@ -108,8 +115,6 @@ review-deploy:
schedule:review-deploy:
<<: *review-deploy-base
<<: *review-schedules-only
- script:
- - perform_review_app_deployment
review-stop:
<<: *review-base
@@ -124,11 +129,11 @@ review-stop:
script:
- source scripts/review_apps/review-apps.sh
- delete
- - cleanup
.review-qa-base: &review-qa-base
<<: *review-docker
allow_failure: true
+ retry: 2
stage: qa
variables:
<<: *review-docker-variables
diff --git a/scripts/review_apps/review-apps.sh b/scripts/review_apps/review-apps.sh
index 3bae2e08a6f..633ea28e96c 100755
--- a/scripts/review_apps/review-apps.sh
+++ b/scripts/review_apps/review-apps.sh
@@ -1,7 +1,7 @@
[[ "$TRACE" ]] && set -x
export TILLER_NAMESPACE="$KUBE_NAMESPACE"
-function deployExists() {
+function deploy_exists() {
local namespace="${1}"
local deploy="${2}"
echoinfo "Checking if ${deploy} exists in the ${namespace} namespace..." true
@@ -13,8 +13,7 @@ function deployExists() {
return $deploy_exists
}
-function previousDeployFailed() {
- set +e
+function previous_deploy_failed() {
local deploy="${1}"
echoinfo "Checking for previous deployment of ${deploy}" true
@@ -34,7 +33,6 @@ function previousDeployFailed() {
else
echoerr "Previous deployment NOT found."
fi
- set -e
return $status
}
@@ -51,49 +49,35 @@ function delete() {
helm delete --purge "$name"
}
-function cleanup() {
- if [ -z "$CI_ENVIRONMENT_SLUG" ]; then
- echoerr "No release given, aborting the delete!"
- return
- fi
-
- echoinfo "Cleaning up '$CI_ENVIRONMENT_SLUG'..." true
-
- kubectl -n "$KUBE_NAMESPACE" delete \
- ingress,svc,pdb,hpa,deploy,statefulset,job,pod,secret,configmap,pvc,secret,clusterrole,clusterrolebinding,role,rolebinding,sa \
- --now --ignore-not-found --include-uninitialized \
- -l release="$CI_ENVIRONMENT_SLUG"
-}
-
function get_pod() {
local app_name="${1}"
local status="${2-Running}"
get_pod_cmd="kubectl get pods -n ${KUBE_NAMESPACE} --field-selector=status.phase=${status} -lapp=${app_name},release=${CI_ENVIRONMENT_SLUG} --no-headers -o=custom-columns=NAME:.metadata.name"
- echoinfo "Running '${get_pod_cmd}'" true
+ echoinfo "Waiting till '${app_name}' pod is ready" true
+ echoinfo "Running '${get_pod_cmd}'"
+ local interval=5
+ local elapsed_seconds=0
+ local max_seconds=$((2 * 60))
while true; do
local pod_name
pod_name="$(eval "${get_pod_cmd}")"
[[ "${pod_name}" == "" ]] || break
- echoinfo "Waiting till '${app_name}' pod is ready";
- sleep 5;
+ if [[ "${elapsed_seconds}" -gt "${max_seconds}" ]]; then
+ echoerr "The pod name couldn't be found after ${elapsed_seconds} seconds, aborting."
+ break
+ fi
+
+ printf "."
+ let "elapsed_seconds+=interval"
+ sleep ${interval}
done
echoinfo "The pod name is '${pod_name}'."
echo "${pod_name}"
}
-function perform_review_app_deployment() {
- check_kube_domain
- ensure_namespace
- install_tiller
- install_external_dns
- time deploy
- wait_for_review_app_to_be_accessible
- add_license
-}
-
function check_kube_domain() {
echoinfo "Checking that Kube domain exists..." true
@@ -119,9 +103,16 @@ function install_tiller() {
echoinfo "Initiating the Helm client..."
helm init --client-only
+ # Set toleration for Tiller to be installed on a specific node pool
helm init \
+ --wait \
--upgrade \
- --replicas 2
+ --node-selectors "app=helm" \
+ --replicas 3 \
+ --override "spec.template.spec.tolerations[0].key"="dedicated" \
+ --override "spec.template.spec.tolerations[0].operator"="Equal" \
+ --override "spec.template.spec.tolerations[0].value"="helm" \
+ --override "spec.template.spec.tolerations[0].effect"="NoSchedule"
kubectl rollout status -n "$TILLER_NAMESPACE" -w "deployment/tiller-deploy"
@@ -137,7 +128,7 @@ function install_external_dns() {
domain=$(echo "${REVIEW_APPS_DOMAIN}" | awk -F. '{printf "%s.%s", $(NF-1), $NF}')
echoinfo "Installing external DNS for domain ${domain}..." true
- if ! deployExists "${KUBE_NAMESPACE}" "${release_name}" || previousDeployFailed "${release_name}" ; then
+ if ! deploy_exists "${KUBE_NAMESPACE}" "${release_name}" || previous_deploy_failed "${release_name}" ; then
echoinfo "Installing external-dns Helm chart"
helm repo update
helm install stable/external-dns \
@@ -156,7 +147,7 @@ function install_external_dns() {
fi
}
-function create_secret() {
+function create_application_secret() {
echoinfo "Creating the ${CI_ENVIRONMENT_SLUG}-gitlab-initial-root-password secret in the ${KUBE_NAMESPACE} namespace..." true
kubectl create secret generic -n "$KUBE_NAMESPACE" \
@@ -165,7 +156,7 @@ function create_secret() {
--dry-run -o json | kubectl apply -f -
}
-function download_gitlab_chart() {
+function download_chart() {
echoinfo "Downloading the GitLab chart..." true
curl -o gitlab.tar.bz2 "https://gitlab.com/charts/gitlab/-/archive/${GITLAB_HELM_CHART_REF}/gitlab-${GITLAB_HELM_CHART_REF}.tar.bz2"
@@ -194,14 +185,12 @@ function deploy() {
gitlab_workhorse_image_repository="${IMAGE_REPOSITORY}/gitlab-workhorse-${IMAGE_VERSION}"
# Cleanup and previous installs, as FAILED and PENDING_UPGRADE will cause errors with `upgrade`
- if [ "$CI_ENVIRONMENT_SLUG" != "production" ] && previousDeployFailed "$CI_ENVIRONMENT_SLUG" ; then
+ if [ "$CI_ENVIRONMENT_SLUG" != "production" ] && previous_deploy_failed "$CI_ENVIRONMENT_SLUG" ; then
echo "Deployment in bad state, cleaning up $CI_ENVIRONMENT_SLUG"
delete
- cleanup
fi
- create_secret
- download_gitlab_chart
+ create_application_secret
HELM_CMD=$(cat << EOF
helm upgrade --install \
@@ -216,7 +205,7 @@ HELM_CMD=$(cat << EOF
--set prometheus.install=false \
--set global.ingress.configureCertmanager=false \
--set global.ingress.tls.secretName=tls-cert \
- --set global.ingress.annotations."external-dns\.alpha\.kubernetes\.io/ttl"="10"
+ --set global.ingress.annotations."external-dns\.alpha\.kubernetes\.io/ttl"="10" \
--set nginx-ingress.controller.service.enableHttp=false \
--set nginx-ingress.defaultBackend.resources.requests.memory=7Mi \
--set nginx-ingress.controller.resources.requests.memory=440M \
@@ -252,14 +241,35 @@ EOF
echoinfo "Deploying with:"
echoinfo "${HELM_CMD}"
- eval $HELM_CMD || true
+ eval "${HELM_CMD}"
+}
+
+function display_deployment_debug() {
+ migrations_pod=$(get_pod "migrations");
+ if [ -z "${migrations_pod}" ]; then
+ echoerr "Migrations pod not found."
+ else
+ echoinfo "Logs tail of the ${migrations_pod} pod..."
+
+ kubectl logs -n "$KUBE_NAMESPACE" "${migrations_pod}" | sed "s/${REVIEW_APPS_ROOT_PASSWORD}/[REDACTED]/g"
+ fi
+
+ unicorn_pod=$(get_pod "unicorn");
+ if [ -z "${unicorn_pod}" ]; then
+ echoerr "Unicorn pod not found."
+ else
+ echoinfo "Logs tail of the ${unicorn_pod} pod..."
+
+ kubectl logs -n "$KUBE_NAMESPACE" -c unicorn "${unicorn_pod}" | sed "s/${REVIEW_APPS_ROOT_PASSWORD}/[REDACTED]/g"
+ fi
}
function wait_for_review_app_to_be_accessible() {
- # In case the Review App isn't completely available yet. Keep trying for 5 minutes.
+ echoinfo "Waiting for the Review App at ${CI_ENVIRONMENT_URL} to be accessible..." true
+
local interval=5
local elapsed_seconds=0
- local max_seconds=$((5 * 60))
+ local max_seconds=$((2 * 60))
while true; do
local review_app_http_code
review_app_http_code=$(curl --silent --output /dev/null --max-time 5 --write-out "%{http_code}" "${CI_ENVIRONMENT_URL}/users/sign_in")
@@ -272,10 +282,10 @@ function wait_for_review_app_to_be_accessible() {
sleep ${interval}
done
- if [[ "${review_app_http_code}" == "200" ]]; then
- echoinfo "The Review App at ${CI_ENVIRONMENT_URL} is ready!"
+ if [[ "${review_app_http_code}" -eq "200" ]]; then
+ echoinfo "The Review App at ${CI_ENVIRONMENT_URL} is ready after ${elapsed_seconds} seconds!"
else
- echoerr "The Review App at ${CI_ENVIRONMENT_URL} isn't ready after 5 minutes of polling..."
+ echoerr "The Review App at ${CI_ENVIRONMENT_URL} isn't ready after ${max_seconds} seconds of polling..."
exit 1
fi
}