summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitlab-ci.yml5
-rw-r--r--.gitlab/ci/frontend.gitlab-ci.yml8
-rw-r--r--.gitlab/ci/global.gitlab-ci.yml12
-rw-r--r--.gitlab/ci/reports.gitlab-ci.yml27
-rw-r--r--.gitlab/ci/review.gitlab-ci.yml8
-rw-r--r--app/models/concerns/prometheus_adapter.rb2
-rw-r--r--app/models/project_services/chat_notification_service.rb5
-rw-r--r--app/models/project_services/prometheus_service.rb8
-rw-r--r--app/services/prometheus/create_default_alerts_service.rb88
-rw-r--r--app/workers/all_queues.yml7
-rw-r--r--app/workers/prometheus/create_default_alerts_worker.rb27
-rw-r--r--changelogs/unreleased/118788-automatic-metric-alerts.yml5
-rw-r--r--changelogs/unreleased/212073-slack-notifications-stop-working-after-updating-gitlab.yml5
-rw-r--r--config/sidekiq_queues.yml2
-rw-r--r--spec/models/concerns/prometheus_adapter_spec.rb23
-rw-r--r--spec/models/project_services/chat_notification_service_spec.rb24
-rw-r--r--spec/models/project_services/prometheus_service_spec.rb28
-rw-r--r--spec/services/prometheus/create_default_alerts_service_spec.rb74
-rw-r--r--spec/workers/prometheus/create_default_alerts_worker_spec.rb66
19 files changed, 390 insertions, 34 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 4f91bdd27b1..9e808cc7a9b 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -12,7 +12,9 @@ stages:
- post-qa
- pages
-# always use `gitlab-org` runners
+# always use `gitlab-org` runners, however
+# in cases where jobs require Docker-in-Docker, the job
+# definition must be extended with `.use-docker-in-docker`
default:
tags:
- gitlab-org
@@ -49,6 +51,7 @@ variables:
BUILD_ASSETS_IMAGE: "false"
ES_JAVA_OPTS: "-Xms256m -Xmx256m"
ELASTIC_URL: "http://elastic:changeme@elasticsearch:9200"
+ DOCKER_VERSION: "19.03.0"
include:
- local: .gitlab/ci/cache-repo.gitlab-ci.yml
diff --git a/.gitlab/ci/frontend.gitlab-ci.yml b/.gitlab/ci/frontend.gitlab-ci.yml
index d1fe9c6241d..f465099195b 100644
--- a/.gitlab/ci/frontend.gitlab-ci.yml
+++ b/.gitlab/ci/frontend.gitlab-ci.yml
@@ -15,10 +15,9 @@
- .default-retry
- .default-before_script
- .assets-compile-cache
+ - .use-docker-in-docker
image: registry.gitlab.com/gitlab-org/gitlab-build-images:ruby-2.6.5-git-2.26-lfs-2.9-chrome-73.0-node-12.x-yarn-1.21-graphicsmagick-1.3.34-docker-19.03.1
stage: prepare
- services:
- - docker:19.03.0-dind
variables:
NODE_ENV: "production"
RAILS_ENV: "production"
@@ -27,8 +26,6 @@
WEBPACK_REPORT: "true"
# we override the max_old_space_size to prevent OOM errors
NODE_OPTIONS: --max_old_space_size=3584
- DOCKER_DRIVER: overlay2
- DOCKER_HOST: tcp://docker:2375
cache:
key: "assets-compile:production:v1"
artifacts:
@@ -53,9 +50,6 @@
- time scripts/build_assets_image
- scripts/clean-old-cached-assets
- rm -f /etc/apt/sources.list.d/google*.list # We don't need to update Chrome here
- tags:
- - gitlab-org
- - docker
gitlab:assets:compile pull-push-cache:
extends:
diff --git a/.gitlab/ci/global.gitlab-ci.yml b/.gitlab/ci/global.gitlab-ci.yml
index e5467df7374..83a2f7abad0 100644
--- a/.gitlab/ci/global.gitlab-ci.yml
+++ b/.gitlab/ci/global.gitlab-ci.yml
@@ -101,3 +101,15 @@
.as-if-foss:
variables:
FOSS_ONLY: '1'
+
+.use-docker-in-docker:
+ image: docker:${DOCKER_VERSION}
+ services:
+ - docker:${DOCKER_VERSION}-dind
+ variables:
+ DOCKER_DRIVER: overlay2
+ DOCKER_HOST: tcp://docker:2375
+ DOCKER_TLS_CERTDIR: ""
+ tags:
+ # See https://gitlab.com/gitlab-com/www-gitlab-com/-/issues/7019 for tag descriptions
+ - gitlab-org-docker
diff --git a/.gitlab/ci/reports.gitlab-ci.yml b/.gitlab/ci/reports.gitlab-ci.yml
index 77ad938a0ef..b1343afdb5e 100644
--- a/.gitlab/ci/reports.gitlab-ci.yml
+++ b/.gitlab/ci/reports.gitlab-ci.yml
@@ -11,15 +11,14 @@ code_quality:
extends:
- .default-retry
- .reports:rules:code_quality
+ - .use-docker-in-docker
stage: test
needs: []
- image: docker:stable
allow_failure: true
- services:
- - docker:stable-dind
variables:
- DOCKER_DRIVER: overlay2
- DOCKER_TLS_CERTDIR: ""
+ # emptying DOCKER_HOST so it can be detected properly on kubernetes executor
+ # with the script below
+ DOCKER_HOST: ""
CODE_QUALITY_IMAGE: "registry.gitlab.com/gitlab-org/ci-cd/codequality:0.85.9"
script:
- |
@@ -50,6 +49,7 @@ sast:
extends:
- .default-retry
- .reports:rules:sast
+ - .use-docker-in-docker
stage: test
allow_failure: true
needs: []
@@ -59,14 +59,12 @@ sast:
reports:
sast: gl-sast-report.json
expire_in: 1 week # GitLab-specific
- image: docker:stable
variables:
- DOCKER_DRIVER: overlay2
- DOCKER_TLS_CERTDIR: ""
+ # emptying DOCKER_HOST so it can be detected properly on kubernetes executor
+ # with the script below
+ DOCKER_HOST: ""
SAST_BRAKEMAN_LEVEL: 2 # GitLab-specific
SAST_EXCLUDED_PATHS: qa,spec,doc,ee/spec # GitLab-specific
- services:
- - docker:stable-dind
script:
- export SAST_VERSION=${SP_VERSION:-$(echo "$CI_SERVER_VERSION" | sed 's/^\([0-9]*\)\.\([0-9]*\).*/\1-\2-stable/')}
- |
@@ -89,16 +87,15 @@ dependency_scanning:
extends:
- .default-retry
- .reports:rules:dependency_scanning
+ - .use-docker-in-docker
stage: test
needs: []
- image: docker:stable
variables:
- DOCKER_DRIVER: overlay2
- DOCKER_TLS_CERTDIR: ""
+ # emptying DOCKER_HOST so it can be detected properly on kubernetes executor
+ # with the script below
+ DOCKER_HOST: ""
DS_EXCLUDED_PATHS: "qa/qa/ee/fixtures/secure_premade_reports,spec,ee/spec" # GitLab-specific
allow_failure: true
- services:
- - docker:stable-dind
script:
- export DS_VERSION=${SP_VERSION:-$(echo "$CI_SERVER_VERSION" | sed 's/^\([0-9]*\)\.\([0-9]*\).*/\1-\2-stable/')}
- |
diff --git a/.gitlab/ci/review.gitlab-ci.yml b/.gitlab/ci/review.gitlab-ci.yml
index 8148b044eb4..0ca27c52083 100644
--- a/.gitlab/ci/review.gitlab-ci.yml
+++ b/.gitlab/ci/review.gitlab-ci.yml
@@ -1,15 +1,9 @@
.review-docker:
extends:
- .default-retry
+ - .use-docker-in-docker
image: registry.gitlab.com/gitlab-org/gitlab-build-images:gitlab-qa-alpine-ruby-2.6
- services:
- - docker:19.03.0-dind
- tags:
- - gitlab-org
- - docker
variables:
- DOCKER_DRIVER: overlay2
- DOCKER_HOST: tcp://docker:2375
GITLAB_EDITION: "ce"
build-qa-image:
diff --git a/app/models/concerns/prometheus_adapter.rb b/app/models/concerns/prometheus_adapter.rb
index 2c171eecbd5..abc41a1c476 100644
--- a/app/models/concerns/prometheus_adapter.rb
+++ b/app/models/concerns/prometheus_adapter.rb
@@ -5,8 +5,6 @@ module PrometheusAdapter
included do
include ReactiveCaching
- # We can't prepend outside of this model due to the use of `included`, so this must stay here.
- prepend_if_ee('EE::PrometheusAdapter') # rubocop: disable Cop/InjectEnterpriseEditionModule
self.reactive_cache_lease_timeout = 30.seconds
self.reactive_cache_refresh_interval = 30.seconds
diff --git a/app/models/project_services/chat_notification_service.rb b/app/models/project_services/chat_notification_service.rb
index 7bd011101dd..1ec983223f3 100644
--- a/app/models/project_services/chat_notification_service.rb
+++ b/app/models/project_services/chat_notification_service.rb
@@ -84,10 +84,11 @@ class ChatNotificationService < Service
event_type = data[:event_type] || object_kind
- channel_names = get_channel_field(event_type).presence || channel
+ channel_names = get_channel_field(event_type).presence || channel.presence
+ channels = channel_names&.split(',')&.map(&:strip)
opts = {}
- opts[:channel] = channel_names.split(',').map(&:strip) if channel_names
+ opts[:channel] = channels if channels.present?
opts[:username] = username if username
return false unless notify(message, opts)
diff --git a/app/models/project_services/prometheus_service.rb b/app/models/project_services/prometheus_service.rb
index 17bc29ebdcf..30dfcc11417 100644
--- a/app/models/project_services/prometheus_service.rb
+++ b/app/models/project_services/prometheus_service.rb
@@ -24,6 +24,8 @@ class PrometheusService < MonitoringService
after_commit :track_events
+ after_create_commit :create_default_alerts
+
def initialize_properties
if properties.nil?
self.properties = {}
@@ -147,4 +149,10 @@ class PrometheusService < MonitoringService
def disabled_manual_prometheus?
manual_configuration_changed? && !manual_configuration?
end
+
+ def create_default_alerts
+ return unless project_id
+
+ Prometheus::CreateDefaultAlertsWorker.perform_async(project_id: project_id)
+ end
end
diff --git a/app/services/prometheus/create_default_alerts_service.rb b/app/services/prometheus/create_default_alerts_service.rb
new file mode 100644
index 00000000000..3eb5ad7711a
--- /dev/null
+++ b/app/services/prometheus/create_default_alerts_service.rb
@@ -0,0 +1,88 @@
+# frozen_string_literal: true
+
+module Prometheus
+ class CreateDefaultAlertsService < BaseService
+ include Gitlab::Utils::StrongMemoize
+
+ attr_reader :project
+
+ DEFAULT_ALERTS = [
+ {
+ identifier: 'response_metrics_nginx_ingress_16_http_error_rate',
+ operator: 'gt',
+ threshold: 0.1
+ },
+ {
+ identifier: 'response_metrics_nginx_ingress_http_error_rate',
+ operator: 'gt',
+ threshold: 0.1
+ }
+ ].freeze
+
+ def initialize(project:)
+ @project = project
+ end
+
+ def execute
+ return ServiceResponse.error(message: 'Invalid project') unless project
+ return ServiceResponse.error(message: 'Invalid environment') unless environment
+
+ create_alerts
+
+ ServiceResponse.success
+ end
+
+ private
+
+ def create_alerts
+ DEFAULT_ALERTS.each do |alert_hash|
+ identifier = alert_hash[:identifier]
+ next if alerts_by_identifier(environment).key?(identifier)
+
+ metric = metrics_by_identifier[identifier]
+ next unless metric
+
+ create_alert(alert: alert_hash, metric: metric)
+ end
+ end
+
+ def metrics_by_identifier
+ strong_memoize(:metrics_by_identifier) do
+ metric_identifiers = DEFAULT_ALERTS.map { |alert| alert[:identifier] }
+
+ PrometheusMetricsFinder
+ .new(identifier: metric_identifiers, common: true)
+ .execute
+ .index_by(&:identifier)
+ end
+ end
+
+ def alerts_by_identifier(environment)
+ strong_memoize(:alerts_by_identifier) do
+ Projects::Prometheus::AlertsFinder
+ .new(project: project, metric: metrics_by_identifier.values, environment: environment)
+ .execute
+ .index_by { |alert| alert.prometheus_metric.identifier }
+ end
+ end
+
+ def environment
+ strong_memoize(:environment) do
+ EnvironmentsFinder.new(project, nil, name: 'production').find.first ||
+ project.environments.first
+ end
+ end
+
+ def create_alert(alert:, metric:)
+ PrometheusAlert.create!(
+ project: project,
+ prometheus_metric: metric,
+ environment: environment,
+ threshold: alert[:threshold],
+ operator: alert[:operator]
+ )
+ rescue ActiveRecord::RecordNotUnique
+ # Ignore duplicate creations although it unlikely to happen
+ end
+ end
+end
diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml
index 3ee8901b23b..5e3ea162d20 100644
--- a/app/workers/all_queues.yml
+++ b/app/workers/all_queues.yml
@@ -1256,6 +1256,13 @@
:resource_boundary: :unknown
:weight: 1
:idempotent:
+- :name: prometheus_create_default_alerts
+ :feature_category: :incident_management
+ :has_external_dependencies:
+ :urgency: :high
+ :resource_boundary: :unknown
+ :weight: 1
+ :idempotent: true
- :name: propagate_service_template
:feature_category: :source_code_management
:has_external_dependencies:
diff --git a/app/workers/prometheus/create_default_alerts_worker.rb b/app/workers/prometheus/create_default_alerts_worker.rb
new file mode 100644
index 00000000000..2c4fefa9ece
--- /dev/null
+++ b/app/workers/prometheus/create_default_alerts_worker.rb
@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+
+module Prometheus
+ class CreateDefaultAlertsWorker
+ include ApplicationWorker
+
+ feature_category :incident_management
+ urgency :high
+ idempotent!
+
+ def perform(project_id)
+ project = Project.find_by_id(project_id)
+
+ return unless project
+
+ result = Prometheus::CreateDefaultAlertsService.new(project: project).execute
+
+ log_info(result.message) if result.error?
+ end
+
+ private
+
+ def log_info(message)
+ logger.info(structured_payload(message: message))
+ end
+ end
+end
diff --git a/changelogs/unreleased/118788-automatic-metric-alerts.yml b/changelogs/unreleased/118788-automatic-metric-alerts.yml
new file mode 100644
index 00000000000..5931a6c1b9d
--- /dev/null
+++ b/changelogs/unreleased/118788-automatic-metric-alerts.yml
@@ -0,0 +1,5 @@
+---
+title: Add Prometheus alerts automatically after Prometheus Service was created
+merge_request: 28503
+author:
+type: added
diff --git a/changelogs/unreleased/212073-slack-notifications-stop-working-after-updating-gitlab.yml b/changelogs/unreleased/212073-slack-notifications-stop-working-after-updating-gitlab.yml
new file mode 100644
index 00000000000..3814ed9476a
--- /dev/null
+++ b/changelogs/unreleased/212073-slack-notifications-stop-working-after-updating-gitlab.yml
@@ -0,0 +1,5 @@
+---
+title: Fix Slack notifications when upgrading from old GitLab versions
+merge_request: 29111
+author:
+type: fixed
diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml
index 40e09d6196e..c7d1e745fab 100644
--- a/config/sidekiq_queues.yml
+++ b/config/sidekiq_queues.yml
@@ -200,6 +200,8 @@
- 1
- - project_update_repository_storage
- 1
+- - prometheus_create_default_alerts
+ - 1
- - propagate_service_template
- 1
- - reactive_caching
diff --git a/spec/models/concerns/prometheus_adapter_spec.rb b/spec/models/concerns/prometheus_adapter_spec.rb
index bc58df7e7c2..fdc98ba74b8 100644
--- a/spec/models/concerns/prometheus_adapter_spec.rb
+++ b/spec/models/concerns/prometheus_adapter_spec.rb
@@ -18,6 +18,29 @@ describe PrometheusAdapter, :use_clean_rails_memory_store_caching do
let(:environment_query) { Gitlab::Prometheus::Queries::EnvironmentQuery }
describe '#query' do
+ describe 'validate_query' do
+ let(:environment) { build_stubbed(:environment, slug: 'env-slug') }
+ let(:validation_query) { Gitlab::Prometheus::Queries::ValidateQuery.name }
+ let(:query) { 'avg(response)' }
+ let(:validation_respone) { { data: { valid: true } } }
+
+ around do |example|
+ Timecop.freeze { example.run }
+ end
+
+ context 'with valid data' do
+ subject { service.query(:validate, query) }
+
+ before do
+ stub_reactive_cache(service, validation_respone, validation_query, query)
+ end
+
+ it 'returns query data' do
+ is_expected.to eq(query: { valid: true })
+ end
+ end
+ end
+
describe 'environment' do
let(:environment) { build_stubbed(:environment, slug: 'env-slug') }
diff --git a/spec/models/project_services/chat_notification_service_spec.rb b/spec/models/project_services/chat_notification_service_spec.rb
index 64c7a9b230d..1caec5c6eb7 100644
--- a/spec/models/project_services/chat_notification_service_spec.rb
+++ b/spec/models/project_services/chat_notification_service_spec.rb
@@ -75,6 +75,30 @@ describe ChatNotificationService do
end
end
+ context 'with "channel" property' do
+ before do
+ allow(chat_service).to receive(:channel).and_return(channel)
+ end
+
+ context 'empty string' do
+ let(:channel) { '' }
+
+ it 'does not include the channel' do
+ expect(chat_service).to receive(:notify).with(any_args, hash_excluding(:channel)).and_return(true)
+ expect(chat_service.execute(data)).to be(true)
+ end
+ end
+
+ context 'empty spaces' do
+ let(:channel) { ' ' }
+
+ it 'does not include the channel' do
+ expect(chat_service).to receive(:notify).with(any_args, hash_excluding(:channel)).and_return(true)
+ expect(chat_service.execute(data)).to be(true)
+ end
+ end
+ end
+
shared_examples 'with channel specified' do |channel, expected_channels|
before do
allow(chat_service).to receive(:push_channel).and_return(channel)
diff --git a/spec/models/project_services/prometheus_service_spec.rb b/spec/models/project_services/prometheus_service_spec.rb
index f663f0ab7cb..415d634d405 100644
--- a/spec/models/project_services/prometheus_service_spec.rb
+++ b/spec/models/project_services/prometheus_service_spec.rb
@@ -123,6 +123,34 @@ describe PrometheusService, :use_clean_rails_memory_store_caching do
end
end
+ describe 'callbacks' do
+ context 'after_create' do
+ let(:project) { create(:project) }
+ let(:service) { build(:prometheus_service, project: project) }
+
+ subject(:create_service) { service.save! }
+
+ it 'creates default alerts' do
+ expect(Prometheus::CreateDefaultAlertsWorker)
+ .to receive(:perform_async)
+ .with(project_id: project.id)
+
+ create_service
+ end
+
+ context 'no project exists' do
+ let(:service) { build(:prometheus_service, :instance) }
+
+ it 'does not create default alerts' do
+ expect(Prometheus::CreateDefaultAlertsWorker)
+ .not_to receive(:perform_async)
+
+ create_service
+ end
+ end
+ end
+ end
+
describe '#test' do
before do
service.manual_configuration = true
diff --git a/spec/services/prometheus/create_default_alerts_service_spec.rb b/spec/services/prometheus/create_default_alerts_service_spec.rb
new file mode 100644
index 00000000000..3382844c99a
--- /dev/null
+++ b/spec/services/prometheus/create_default_alerts_service_spec.rb
@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Prometheus::CreateDefaultAlertsService do
+ let_it_be(:project) { create(:project) }
+ let(:instance) { described_class.new(project: project) }
+ let(:expected_alerts) { described_class::DEFAULT_ALERTS }
+
+ describe '#execute' do
+ subject(:execute) { instance.execute }
+
+ shared_examples 'no alerts created' do
+ it 'does not create alerts' do
+ expect { execute }.not_to change { project.reload.prometheus_alerts.count }
+ end
+ end
+
+ context 'no environment' do
+ it_behaves_like 'no alerts created'
+ end
+
+ context 'environment exists' do
+ let_it_be(:environment) { create(:environment, project: project) }
+
+ context 'no found metric' do
+ it_behaves_like 'no alerts created'
+ end
+
+ context 'metric exists' do
+ before do
+ create_expected_metrics!
+ end
+
+ context 'alert exists already' do
+ before do
+ create_pre_existing_alerts!(environment)
+ end
+
+ it_behaves_like 'no alerts created'
+ end
+
+ it 'creates alerts' do
+ expect { execute }.to change { project.reload.prometheus_alerts.count }
+ .by(expected_alerts.size)
+ end
+
+ context 'multiple environments' do
+ let!(:production) { create(:environment, project: project, name: 'production') }
+
+ it 'uses the production environment' do
+ expect { execute }.to change { production.reload.prometheus_alerts.count }
+ .by(expected_alerts.size)
+ end
+ end
+ end
+ end
+ end
+
+ private
+
+ def create_expected_metrics!
+ expected_alerts.each do |alert_hash|
+ create(:prometheus_metric, :common, identifier: alert_hash.fetch(:identifier))
+ end
+ end
+
+ def create_pre_existing_alerts!(environment)
+ expected_alerts.each do |alert_hash|
+ metric = PrometheusMetric.for_identifier(alert_hash[:identifier]).first!
+ create(:prometheus_alert, prometheus_metric: metric, project: project, environment: environment)
+ end
+ end
+end
diff --git a/spec/workers/prometheus/create_default_alerts_worker_spec.rb b/spec/workers/prometheus/create_default_alerts_worker_spec.rb
new file mode 100644
index 00000000000..1b1867d5bb6
--- /dev/null
+++ b/spec/workers/prometheus/create_default_alerts_worker_spec.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe Prometheus::CreateDefaultAlertsWorker do
+ let_it_be(:project) { create(:project) }
+ let(:worker) { described_class.new }
+ let(:logger) { worker.send(:logger) }
+ let(:service) { instance_double(Prometheus::CreateDefaultAlertsService) }
+ let(:service_result) { ServiceResponse.success }
+
+ subject { described_class.new.perform(project.id) }
+
+ before do
+ allow(Prometheus::CreateDefaultAlertsService)
+ .to receive(:new).with(project: project)
+ .and_return(service)
+ allow(service).to receive(:execute)
+ .and_return(service_result)
+ end
+
+ it_behaves_like 'an idempotent worker' do
+ let(:job_args) { [project.id] }
+
+ it 'calls the service' do
+ expect(service).to receive(:execute)
+
+ subject
+ end
+
+ context 'project is nil' do
+ let(:job_args) { [nil] }
+
+ it 'does not call the service' do
+ expect(service).not_to receive(:execute)
+
+ subject
+ end
+ end
+
+ context 'when service returns an error' do
+ let(:error_message) { 'some message' }
+ let(:service_result) { ServiceResponse.error(message: error_message) }
+
+ it 'succeeds and logs the error' do
+ expect(logger)
+ .to receive(:info)
+ .with(a_hash_including('message' => error_message))
+ .exactly(worker_exec_times).times
+
+ subject
+ end
+ end
+ end
+
+ context 'when service raises an exception' do
+ let(:error_message) { 'some exception' }
+ let(:exception) { StandardError.new(error_message) }
+
+ it 're-raises exception' do
+ allow(service).to receive(:execute).and_raise(exception)
+
+ expect { subject }.to raise_error(exception)
+ end
+ end
+end