diff options
19 files changed, 390 insertions, 34 deletions
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4f91bdd27b1..9e808cc7a9b 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,7 +12,9 @@ stages: - post-qa - pages -# always use `gitlab-org` runners +# always use `gitlab-org` runners, however +# in cases where jobs require Docker-in-Docker, the job +# definition must be extended with `.use-docker-in-docker` default: tags: - gitlab-org @@ -49,6 +51,7 @@ variables: BUILD_ASSETS_IMAGE: "false" ES_JAVA_OPTS: "-Xms256m -Xmx256m" ELASTIC_URL: "http://elastic:changeme@elasticsearch:9200" + DOCKER_VERSION: "19.03.0" include: - local: .gitlab/ci/cache-repo.gitlab-ci.yml diff --git a/.gitlab/ci/frontend.gitlab-ci.yml b/.gitlab/ci/frontend.gitlab-ci.yml index d1fe9c6241d..f465099195b 100644 --- a/.gitlab/ci/frontend.gitlab-ci.yml +++ b/.gitlab/ci/frontend.gitlab-ci.yml @@ -15,10 +15,9 @@ - .default-retry - .default-before_script - .assets-compile-cache + - .use-docker-in-docker image: registry.gitlab.com/gitlab-org/gitlab-build-images:ruby-2.6.5-git-2.26-lfs-2.9-chrome-73.0-node-12.x-yarn-1.21-graphicsmagick-1.3.34-docker-19.03.1 stage: prepare - services: - - docker:19.03.0-dind variables: NODE_ENV: "production" RAILS_ENV: "production" @@ -27,8 +26,6 @@ WEBPACK_REPORT: "true" # we override the max_old_space_size to prevent OOM errors NODE_OPTIONS: --max_old_space_size=3584 - DOCKER_DRIVER: overlay2 - DOCKER_HOST: tcp://docker:2375 cache: key: "assets-compile:production:v1" artifacts: @@ -53,9 +50,6 @@ - time scripts/build_assets_image - scripts/clean-old-cached-assets - rm -f /etc/apt/sources.list.d/google*.list # We don't need to update Chrome here - tags: - - gitlab-org - - docker gitlab:assets:compile pull-push-cache: extends: diff --git a/.gitlab/ci/global.gitlab-ci.yml b/.gitlab/ci/global.gitlab-ci.yml index e5467df7374..83a2f7abad0 100644 --- a/.gitlab/ci/global.gitlab-ci.yml +++ b/.gitlab/ci/global.gitlab-ci.yml @@ -101,3 +101,15 @@ .as-if-foss: variables: FOSS_ONLY: '1' + +.use-docker-in-docker: + image: docker:${DOCKER_VERSION} + services: + - docker:${DOCKER_VERSION}-dind + variables: + DOCKER_DRIVER: overlay2 + DOCKER_HOST: tcp://docker:2375 + DOCKER_TLS_CERTDIR: "" + tags: + # See https://gitlab.com/gitlab-com/www-gitlab-com/-/issues/7019 for tag descriptions + - gitlab-org-docker diff --git a/.gitlab/ci/reports.gitlab-ci.yml b/.gitlab/ci/reports.gitlab-ci.yml index 77ad938a0ef..b1343afdb5e 100644 --- a/.gitlab/ci/reports.gitlab-ci.yml +++ b/.gitlab/ci/reports.gitlab-ci.yml @@ -11,15 +11,14 @@ code_quality: extends: - .default-retry - .reports:rules:code_quality + - .use-docker-in-docker stage: test needs: [] - image: docker:stable allow_failure: true - services: - - docker:stable-dind variables: - DOCKER_DRIVER: overlay2 - DOCKER_TLS_CERTDIR: "" + # emptying DOCKER_HOST so it can be detected properly on kubernetes executor + # with the script below + DOCKER_HOST: "" CODE_QUALITY_IMAGE: "registry.gitlab.com/gitlab-org/ci-cd/codequality:0.85.9" script: - | @@ -50,6 +49,7 @@ sast: extends: - .default-retry - .reports:rules:sast + - .use-docker-in-docker stage: test allow_failure: true needs: [] @@ -59,14 +59,12 @@ sast: reports: sast: gl-sast-report.json expire_in: 1 week # GitLab-specific - image: docker:stable variables: - DOCKER_DRIVER: overlay2 - DOCKER_TLS_CERTDIR: "" + # emptying DOCKER_HOST so it can be detected properly on kubernetes executor + # with the script below + DOCKER_HOST: "" SAST_BRAKEMAN_LEVEL: 2 # GitLab-specific SAST_EXCLUDED_PATHS: qa,spec,doc,ee/spec # GitLab-specific - services: - - docker:stable-dind script: - export SAST_VERSION=${SP_VERSION:-$(echo "$CI_SERVER_VERSION" | sed 's/^\([0-9]*\)\.\([0-9]*\).*/\1-\2-stable/')} - | @@ -89,16 +87,15 @@ dependency_scanning: extends: - .default-retry - .reports:rules:dependency_scanning + - .use-docker-in-docker stage: test needs: [] - image: docker:stable variables: - DOCKER_DRIVER: overlay2 - DOCKER_TLS_CERTDIR: "" + # emptying DOCKER_HOST so it can be detected properly on kubernetes executor + # with the script below + DOCKER_HOST: "" DS_EXCLUDED_PATHS: "qa/qa/ee/fixtures/secure_premade_reports,spec,ee/spec" # GitLab-specific allow_failure: true - services: - - docker:stable-dind script: - export DS_VERSION=${SP_VERSION:-$(echo "$CI_SERVER_VERSION" | sed 's/^\([0-9]*\)\.\([0-9]*\).*/\1-\2-stable/')} - | diff --git a/.gitlab/ci/review.gitlab-ci.yml b/.gitlab/ci/review.gitlab-ci.yml index 8148b044eb4..0ca27c52083 100644 --- a/.gitlab/ci/review.gitlab-ci.yml +++ b/.gitlab/ci/review.gitlab-ci.yml @@ -1,15 +1,9 @@ .review-docker: extends: - .default-retry + - .use-docker-in-docker image: registry.gitlab.com/gitlab-org/gitlab-build-images:gitlab-qa-alpine-ruby-2.6 - services: - - docker:19.03.0-dind - tags: - - gitlab-org - - docker variables: - DOCKER_DRIVER: overlay2 - DOCKER_HOST: tcp://docker:2375 GITLAB_EDITION: "ce" build-qa-image: diff --git a/app/models/concerns/prometheus_adapter.rb b/app/models/concerns/prometheus_adapter.rb index 2c171eecbd5..abc41a1c476 100644 --- a/app/models/concerns/prometheus_adapter.rb +++ b/app/models/concerns/prometheus_adapter.rb @@ -5,8 +5,6 @@ module PrometheusAdapter included do include ReactiveCaching - # We can't prepend outside of this model due to the use of `included`, so this must stay here. - prepend_if_ee('EE::PrometheusAdapter') # rubocop: disable Cop/InjectEnterpriseEditionModule self.reactive_cache_lease_timeout = 30.seconds self.reactive_cache_refresh_interval = 30.seconds diff --git a/app/models/project_services/chat_notification_service.rb b/app/models/project_services/chat_notification_service.rb index 7bd011101dd..1ec983223f3 100644 --- a/app/models/project_services/chat_notification_service.rb +++ b/app/models/project_services/chat_notification_service.rb @@ -84,10 +84,11 @@ class ChatNotificationService < Service event_type = data[:event_type] || object_kind - channel_names = get_channel_field(event_type).presence || channel + channel_names = get_channel_field(event_type).presence || channel.presence + channels = channel_names&.split(',')&.map(&:strip) opts = {} - opts[:channel] = channel_names.split(',').map(&:strip) if channel_names + opts[:channel] = channels if channels.present? opts[:username] = username if username return false unless notify(message, opts) diff --git a/app/models/project_services/prometheus_service.rb b/app/models/project_services/prometheus_service.rb index 17bc29ebdcf..30dfcc11417 100644 --- a/app/models/project_services/prometheus_service.rb +++ b/app/models/project_services/prometheus_service.rb @@ -24,6 +24,8 @@ class PrometheusService < MonitoringService after_commit :track_events + after_create_commit :create_default_alerts + def initialize_properties if properties.nil? self.properties = {} @@ -147,4 +149,10 @@ class PrometheusService < MonitoringService def disabled_manual_prometheus? manual_configuration_changed? && !manual_configuration? end + + def create_default_alerts + return unless project_id + + Prometheus::CreateDefaultAlertsWorker.perform_async(project_id: project_id) + end end diff --git a/app/services/prometheus/create_default_alerts_service.rb b/app/services/prometheus/create_default_alerts_service.rb new file mode 100644 index 00000000000..3eb5ad7711a --- /dev/null +++ b/app/services/prometheus/create_default_alerts_service.rb @@ -0,0 +1,88 @@ +# frozen_string_literal: true + +module Prometheus + class CreateDefaultAlertsService < BaseService + include Gitlab::Utils::StrongMemoize + + attr_reader :project + + DEFAULT_ALERTS = [ + { + identifier: 'response_metrics_nginx_ingress_16_http_error_rate', + operator: 'gt', + threshold: 0.1 + }, + { + identifier: 'response_metrics_nginx_ingress_http_error_rate', + operator: 'gt', + threshold: 0.1 + } + ].freeze + + def initialize(project:) + @project = project + end + + def execute + return ServiceResponse.error(message: 'Invalid project') unless project + return ServiceResponse.error(message: 'Invalid environment') unless environment + + create_alerts + + ServiceResponse.success + end + + private + + def create_alerts + DEFAULT_ALERTS.each do |alert_hash| + identifier = alert_hash[:identifier] + next if alerts_by_identifier(environment).key?(identifier) + + metric = metrics_by_identifier[identifier] + next unless metric + + create_alert(alert: alert_hash, metric: metric) + end + end + + def metrics_by_identifier + strong_memoize(:metrics_by_identifier) do + metric_identifiers = DEFAULT_ALERTS.map { |alert| alert[:identifier] } + + PrometheusMetricsFinder + .new(identifier: metric_identifiers, common: true) + .execute + .index_by(&:identifier) + end + end + + def alerts_by_identifier(environment) + strong_memoize(:alerts_by_identifier) do + Projects::Prometheus::AlertsFinder + .new(project: project, metric: metrics_by_identifier.values, environment: environment) + .execute + .index_by { |alert| alert.prometheus_metric.identifier } + end + end + + def environment + strong_memoize(:environment) do + EnvironmentsFinder.new(project, nil, name: 'production').find.first || + project.environments.first + end + end + + def create_alert(alert:, metric:) + PrometheusAlert.create!( + project: project, + prometheus_metric: metric, + environment: environment, + threshold: alert[:threshold], + operator: alert[:operator] + ) + rescue ActiveRecord::RecordNotUnique + # Ignore duplicate creations although it unlikely to happen + end + end +end diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml index 3ee8901b23b..5e3ea162d20 100644 --- a/app/workers/all_queues.yml +++ b/app/workers/all_queues.yml @@ -1256,6 +1256,13 @@ :resource_boundary: :unknown :weight: 1 :idempotent: +- :name: prometheus_create_default_alerts + :feature_category: :incident_management + :has_external_dependencies: + :urgency: :high + :resource_boundary: :unknown + :weight: 1 + :idempotent: true - :name: propagate_service_template :feature_category: :source_code_management :has_external_dependencies: diff --git a/app/workers/prometheus/create_default_alerts_worker.rb b/app/workers/prometheus/create_default_alerts_worker.rb new file mode 100644 index 00000000000..2c4fefa9ece --- /dev/null +++ b/app/workers/prometheus/create_default_alerts_worker.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +module Prometheus + class CreateDefaultAlertsWorker + include ApplicationWorker + + feature_category :incident_management + urgency :high + idempotent! + + def perform(project_id) + project = Project.find_by_id(project_id) + + return unless project + + result = Prometheus::CreateDefaultAlertsService.new(project: project).execute + + log_info(result.message) if result.error? + end + + private + + def log_info(message) + logger.info(structured_payload(message: message)) + end + end +end diff --git a/changelogs/unreleased/118788-automatic-metric-alerts.yml b/changelogs/unreleased/118788-automatic-metric-alerts.yml new file mode 100644 index 00000000000..5931a6c1b9d --- /dev/null +++ b/changelogs/unreleased/118788-automatic-metric-alerts.yml @@ -0,0 +1,5 @@ +--- +title: Add Prometheus alerts automatically after Prometheus Service was created +merge_request: 28503 +author: +type: added diff --git a/changelogs/unreleased/212073-slack-notifications-stop-working-after-updating-gitlab.yml b/changelogs/unreleased/212073-slack-notifications-stop-working-after-updating-gitlab.yml new file mode 100644 index 00000000000..3814ed9476a --- /dev/null +++ b/changelogs/unreleased/212073-slack-notifications-stop-working-after-updating-gitlab.yml @@ -0,0 +1,5 @@ +--- +title: Fix Slack notifications when upgrading from old GitLab versions +merge_request: 29111 +author: +type: fixed diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml index 40e09d6196e..c7d1e745fab 100644 --- a/config/sidekiq_queues.yml +++ b/config/sidekiq_queues.yml @@ -200,6 +200,8 @@ - 1 - - project_update_repository_storage - 1 +- - prometheus_create_default_alerts + - 1 - - propagate_service_template - 1 - - reactive_caching diff --git a/spec/models/concerns/prometheus_adapter_spec.rb b/spec/models/concerns/prometheus_adapter_spec.rb index bc58df7e7c2..fdc98ba74b8 100644 --- a/spec/models/concerns/prometheus_adapter_spec.rb +++ b/spec/models/concerns/prometheus_adapter_spec.rb @@ -18,6 +18,29 @@ describe PrometheusAdapter, :use_clean_rails_memory_store_caching do let(:environment_query) { Gitlab::Prometheus::Queries::EnvironmentQuery } describe '#query' do + describe 'validate_query' do + let(:environment) { build_stubbed(:environment, slug: 'env-slug') } + let(:validation_query) { Gitlab::Prometheus::Queries::ValidateQuery.name } + let(:query) { 'avg(response)' } + let(:validation_respone) { { data: { valid: true } } } + + around do |example| + Timecop.freeze { example.run } + end + + context 'with valid data' do + subject { service.query(:validate, query) } + + before do + stub_reactive_cache(service, validation_respone, validation_query, query) + end + + it 'returns query data' do + is_expected.to eq(query: { valid: true }) + end + end + end + describe 'environment' do let(:environment) { build_stubbed(:environment, slug: 'env-slug') } diff --git a/spec/models/project_services/chat_notification_service_spec.rb b/spec/models/project_services/chat_notification_service_spec.rb index 64c7a9b230d..1caec5c6eb7 100644 --- a/spec/models/project_services/chat_notification_service_spec.rb +++ b/spec/models/project_services/chat_notification_service_spec.rb @@ -75,6 +75,30 @@ describe ChatNotificationService do end end + context 'with "channel" property' do + before do + allow(chat_service).to receive(:channel).and_return(channel) + end + + context 'empty string' do + let(:channel) { '' } + + it 'does not include the channel' do + expect(chat_service).to receive(:notify).with(any_args, hash_excluding(:channel)).and_return(true) + expect(chat_service.execute(data)).to be(true) + end + end + + context 'empty spaces' do + let(:channel) { ' ' } + + it 'does not include the channel' do + expect(chat_service).to receive(:notify).with(any_args, hash_excluding(:channel)).and_return(true) + expect(chat_service.execute(data)).to be(true) + end + end + end + shared_examples 'with channel specified' do |channel, expected_channels| before do allow(chat_service).to receive(:push_channel).and_return(channel) diff --git a/spec/models/project_services/prometheus_service_spec.rb b/spec/models/project_services/prometheus_service_spec.rb index f663f0ab7cb..415d634d405 100644 --- a/spec/models/project_services/prometheus_service_spec.rb +++ b/spec/models/project_services/prometheus_service_spec.rb @@ -123,6 +123,34 @@ describe PrometheusService, :use_clean_rails_memory_store_caching do end end + describe 'callbacks' do + context 'after_create' do + let(:project) { create(:project) } + let(:service) { build(:prometheus_service, project: project) } + + subject(:create_service) { service.save! } + + it 'creates default alerts' do + expect(Prometheus::CreateDefaultAlertsWorker) + .to receive(:perform_async) + .with(project_id: project.id) + + create_service + end + + context 'no project exists' do + let(:service) { build(:prometheus_service, :instance) } + + it 'does not create default alerts' do + expect(Prometheus::CreateDefaultAlertsWorker) + .not_to receive(:perform_async) + + create_service + end + end + end + end + describe '#test' do before do service.manual_configuration = true diff --git a/spec/services/prometheus/create_default_alerts_service_spec.rb b/spec/services/prometheus/create_default_alerts_service_spec.rb new file mode 100644 index 00000000000..3382844c99a --- /dev/null +++ b/spec/services/prometheus/create_default_alerts_service_spec.rb @@ -0,0 +1,74 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Prometheus::CreateDefaultAlertsService do + let_it_be(:project) { create(:project) } + let(:instance) { described_class.new(project: project) } + let(:expected_alerts) { described_class::DEFAULT_ALERTS } + + describe '#execute' do + subject(:execute) { instance.execute } + + shared_examples 'no alerts created' do + it 'does not create alerts' do + expect { execute }.not_to change { project.reload.prometheus_alerts.count } + end + end + + context 'no environment' do + it_behaves_like 'no alerts created' + end + + context 'environment exists' do + let_it_be(:environment) { create(:environment, project: project) } + + context 'no found metric' do + it_behaves_like 'no alerts created' + end + + context 'metric exists' do + before do + create_expected_metrics! + end + + context 'alert exists already' do + before do + create_pre_existing_alerts!(environment) + end + + it_behaves_like 'no alerts created' + end + + it 'creates alerts' do + expect { execute }.to change { project.reload.prometheus_alerts.count } + .by(expected_alerts.size) + end + + context 'multiple environments' do + let!(:production) { create(:environment, project: project, name: 'production') } + + it 'uses the production environment' do + expect { execute }.to change { production.reload.prometheus_alerts.count } + .by(expected_alerts.size) + end + end + end + end + end + + private + + def create_expected_metrics! + expected_alerts.each do |alert_hash| + create(:prometheus_metric, :common, identifier: alert_hash.fetch(:identifier)) + end + end + + def create_pre_existing_alerts!(environment) + expected_alerts.each do |alert_hash| + metric = PrometheusMetric.for_identifier(alert_hash[:identifier]).first! + create(:prometheus_alert, prometheus_metric: metric, project: project, environment: environment) + end + end +end diff --git a/spec/workers/prometheus/create_default_alerts_worker_spec.rb b/spec/workers/prometheus/create_default_alerts_worker_spec.rb new file mode 100644 index 00000000000..1b1867d5bb6 --- /dev/null +++ b/spec/workers/prometheus/create_default_alerts_worker_spec.rb @@ -0,0 +1,66 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe Prometheus::CreateDefaultAlertsWorker do + let_it_be(:project) { create(:project) } + let(:worker) { described_class.new } + let(:logger) { worker.send(:logger) } + let(:service) { instance_double(Prometheus::CreateDefaultAlertsService) } + let(:service_result) { ServiceResponse.success } + + subject { described_class.new.perform(project.id) } + + before do + allow(Prometheus::CreateDefaultAlertsService) + .to receive(:new).with(project: project) + .and_return(service) + allow(service).to receive(:execute) + .and_return(service_result) + end + + it_behaves_like 'an idempotent worker' do + let(:job_args) { [project.id] } + + it 'calls the service' do + expect(service).to receive(:execute) + + subject + end + + context 'project is nil' do + let(:job_args) { [nil] } + + it 'does not call the service' do + expect(service).not_to receive(:execute) + + subject + end + end + + context 'when service returns an error' do + let(:error_message) { 'some message' } + let(:service_result) { ServiceResponse.error(message: error_message) } + + it 'succeeds and logs the error' do + expect(logger) + .to receive(:info) + .with(a_hash_including('message' => error_message)) + .exactly(worker_exec_times).times + + subject + end + end + end + + context 'when service raises an exception' do + let(:error_message) { 'some exception' } + let(:exception) { StandardError.new(error_message) } + + it 're-raises exception' do + allow(service).to receive(:execute).and_raise(exception) + + expect { subject }.to raise_error(exception) + end + end +end |