summaryrefslogtreecommitdiff
path: root/scripts/create-pipeline-failure-incident.rb
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/create-pipeline-failure-incident.rb')
-rwxr-xr-xscripts/create-pipeline-failure-incident.rb90
1 files changed, 88 insertions, 2 deletions
diff --git a/scripts/create-pipeline-failure-incident.rb b/scripts/create-pipeline-failure-incident.rb
index 1035a680291..2b86fac680d 100755
--- a/scripts/create-pipeline-failure-incident.rb
+++ b/scripts/create-pipeline-failure-incident.rb
@@ -5,9 +5,10 @@
require 'optparse'
require 'json'
-require_relative 'api/pipeline_failed_jobs'
+require_relative 'api/commit_merge_requests'
require_relative 'api/create_issue'
require_relative 'api/create_issue_discussion'
+require_relative 'api/pipeline_failed_jobs'
class CreatePipelineFailureIncident
DEFAULT_OPTIONS = {
@@ -29,6 +30,8 @@ class CreatePipelineFailureIncident
labels: incident_labels
}
+ payload[:assignee_ids] = assignee_ids if stable_branch_incident?
+
CreateIssue.new(project: project, api_token: api_token).execute(payload).tap do |incident|
CreateIssueDiscussion.new(project: project, api_token: api_token)
.execute(issue_iid: incident.iid, body: "## Root Cause Analysis")
@@ -41,8 +44,18 @@ class CreatePipelineFailureIncident
attr_reader :project, :api_token
+ def stable_branch_incident?
+ ENV['CI_COMMIT_REF_NAME'] =~ /^[\d-]+-stable(-ee)?$/
+ end
+
def failed_jobs
- @failed_jobs ||= PipelineFailedJobs.new(API::DEFAULT_OPTIONS.dup.merge(exclude_allowed_to_fail_jobs: true)).execute
+ @failed_jobs ||= PipelineFailedJobs.new(API::DEFAULT_OPTIONS.merge(exclude_allowed_to_fail_jobs: true)).execute
+ end
+
+ def merge_request
+ @merge_request ||= CommitMergeRequests.new(
+ API::DEFAULT_OPTIONS.merge(sha: ENV['CI_COMMIT_SHA'])
+ ).execute.first
end
def now
@@ -63,6 +76,12 @@ class CreatePipelineFailureIncident
end
def description
+ return broken_stable_description_content if stable_branch_incident?
+
+ broken_master_description_content
+ end
+
+ def broken_master_description_content
<<~MARKDOWN
## #{project_link} pipeline #{pipeline_link} failed
@@ -107,7 +126,62 @@ class CreatePipelineFailureIncident
MARKDOWN
end
+ def broken_stable_description_content
+ <<~MARKDOWN
+ ## #{project_link} pipeline #{pipeline_link} failed
+
+ **Branch: #{branch_link}**
+
+ **Commit: #{commit_link}**
+
+ **Merge Request: #{merge_request_link}**
+
+ **Triggered by** #{triggered_by_link} • **Source:** #{source} • **Duration:** #{pipeline_duration} minutes
+
+ **Failed jobs (#{failed_jobs.size}):**
+
+ #{failed_jobs_list}
+
+ ### General guidelines
+
+ A broken stable branch prevents patch releases from being built.
+ Fixing the pipeline is a priority to prevent any delays in releases.
+
+ The process in the [Broken `master` handbook guide](https://about.gitlab.com/handbook/engineering/workflow/#broken-master) can be referenced since much of that process also applies here.
+
+ ### Investigation
+
+ **Be sure to fill the `Timeline` for this incident.**
+
+ 1. If the failure is new, and looks like a potential flaky failure, you can retry the failing job.
+ Make sure to mention the retry in the `Timeline` and leave a link to the retried job.
+ 1. Search for similar master-broken issues in https://gitlab.com/gitlab-org/quality/engineering-productivity/master-broken-incidents/-/issues
+ 1. If one exists, ask the DRI of the master-broken issue to cherry-pick any resulting merge requests into the stable branch
+
+ @gitlab-org/release/managers if the merge request author or maintainer is not available, this can be escalated using the dev-on-call process in the [#dev-escalation slack channel](https://gitlab.slack.com/archives/CLKLMSUR4).
+
+ ### Pre-resolution
+
+ If you believe that there's an easy resolution by either:
+
+ - Reverting a particular merge request.
+ - Making a quick fix (for example, one line or a few similar simple changes in a few lines).
+ You can create a merge request, assign to any available maintainer, and ping people that were involved/related to the introduction of the failure.
+ Additionally, a message can be posted in `#backend_maintainers` or `#frontend_maintainers` to get a maintainer take a look at the fix ASAP.
+ - Cherry picking a change that was used to fix a similar master-broken issue.
+
+ In both cases, make sure to add the ~"pipeline:expedite" label to speed up the `stable`-fixing pipelines.
+
+ ### Resolution
+
+ Add a comment to this issue describing how this incident could have been prevented earlier in the Merge Request pipeline (rather than the merge commit pipeline).
+
+ MARKDOWN
+ end
+
def incident_labels
+ return ['release-blocker'] if stable_branch_incident?
+
master_broken_label =
if ENV['CI_PROJECT_NAME'] == 'gitlab-foss'
'master:foss-broken'
@@ -118,6 +192,12 @@ class CreatePipelineFailureIncident
DEFAULT_LABELS.dup << master_broken_label
end
+ def assignee_ids
+ ids = [ENV['GITLAB_USER_ID'].to_i]
+ ids << merge_request['author']['id'].to_i if merge_request
+ ids
+ end
+
def pipeline_link
"[##{ENV['CI_PIPELINE_ID']}](#{ENV['CI_PIPELINE_URL']})"
end
@@ -134,6 +214,12 @@ class CreatePipelineFailureIncident
"[#{ENV['CI_COMMIT_TITLE']}](#{ENV['CI_PROJECT_URL']}/-/commit/#{ENV['CI_COMMIT_SHA']})"
end
+ def merge_request_link
+ return 'N/A' unless merge_request
+
+ "[#{merge_request['title']}](#{merge_request['web_url']})"
+ end
+
def source
"`#{ENV['CI_PIPELINE_SOURCE']}`"
end