diff options
author | Douwe Maan <douwe@gitlab.com> | 2017-05-12 17:15:31 +0000 |
---|---|---|
committer | Douwe Maan <douwe@gitlab.com> | 2017-05-12 17:15:31 +0000 |
commit | bec9ec9a6e8980d4354f2b577cfd2a96a83a73b7 (patch) | |
tree | 4a5c6ddb0915de712628e48dfdf7c8cbea7a17fa | |
parent | 4024200314144cd7aa0bda35f38817c8b198142a (diff) | |
parent | f38779c6f521e0d554303db0619bafb07ffeda29 (diff) | |
download | gitlab-ce-bec9ec9a6e8980d4354f2b577cfd2a96a83a73b7.tar.gz |
Merge branch '27439-performance-deltas' into 'master'
Expose memory deltas between app deployments and refactor prometheus queries to support more custom queries
See merge request !10981
-rw-r--r-- | app/controllers/projects/deployments_controller.rb | 6 | ||||
-rw-r--r-- | app/models/deployment.rb | 9 | ||||
-rw-r--r-- | app/models/environment.rb | 2 | ||||
-rw-r--r-- | app/models/project_services/monitoring_service.rb | 7 | ||||
-rw-r--r-- | app/models/project_services/prometheus_service.rb | 36 | ||||
-rw-r--r-- | lib/gitlab/prometheus/queries/base_query.rb | 26 | ||||
-rw-r--r-- | lib/gitlab/prometheus/queries/deployment_query.rb | 26 | ||||
-rw-r--r-- | lib/gitlab/prometheus/queries/environment_query.rb | 20 | ||||
-rw-r--r-- | lib/gitlab/prometheus_client.rb (renamed from lib/gitlab/prometheus.rb) | 4 | ||||
-rw-r--r-- | spec/controllers/projects/deployments_controller_spec.rb | 71 | ||||
-rw-r--r-- | spec/lib/gitlab/prometheus/queries/deployment_query_spec.rb | 36 | ||||
-rw-r--r-- | spec/lib/gitlab/prometheus_client_spec.rb (renamed from spec/lib/gitlab/prometheus_spec.rb) | 2 | ||||
-rw-r--r-- | spec/models/deployment_spec.rb | 9 | ||||
-rw-r--r-- | spec/models/environment_spec.rb | 2 | ||||
-rw-r--r-- | spec/models/project_services/prometheus_service_spec.rb | 32 | ||||
-rw-r--r-- | spec/support/prometheus_helpers.rb | 8 |
16 files changed, 213 insertions, 83 deletions
diff --git a/app/controllers/projects/deployments_controller.rb b/app/controllers/projects/deployments_controller.rb index f06a4d943f3..6644deb49c9 100644 --- a/app/controllers/projects/deployments_controller.rb +++ b/app/controllers/projects/deployments_controller.rb @@ -11,13 +11,15 @@ class Projects::DeploymentsController < Projects::ApplicationController end def metrics - @metrics = deployment.metrics(1.hour) - + return render_404 unless deployment.has_metrics? + @metrics = deployment.metrics if @metrics&.any? render json: @metrics, status: :ok else head :no_content end + rescue NotImplementedError + render_404 end private diff --git a/app/models/deployment.rb b/app/models/deployment.rb index f83d9e8edee..216cec751e3 100644 --- a/app/models/deployment.rb +++ b/app/models/deployment.rb @@ -103,15 +103,10 @@ class Deployment < ActiveRecord::Base project.monitoring_service.present? end - def metrics(timeframe) + def metrics return {} unless has_metrics? - half_timeframe = timeframe / 2 - timeframe_start = created_at - half_timeframe - timeframe_end = created_at + half_timeframe - - metrics = project.monitoring_service.metrics(environment, timeframe_start: timeframe_start, timeframe_end: timeframe_end) - metrics&.merge(deployment_time: created_at.to_i) || {} + project.monitoring_service.deployment_metrics(self) end private diff --git a/app/models/environment.rb b/app/models/environment.rb index 61efc1b2d17..61572d8d69a 100644 --- a/app/models/environment.rb +++ b/app/models/environment.rb @@ -150,7 +150,7 @@ class Environment < ActiveRecord::Base end def metrics - project.monitoring_service.metrics(self) if has_metrics? + project.monitoring_service.environment_metrics(self) if has_metrics? end # An environment name is not necessarily suitable for use in URLs, DNS diff --git a/app/models/project_services/monitoring_service.rb b/app/models/project_services/monitoring_service.rb index 59776552540..ee9cd78327a 100644 --- a/app/models/project_services/monitoring_service.rb +++ b/app/models/project_services/monitoring_service.rb @@ -9,8 +9,11 @@ class MonitoringService < Service %w() end - # Environments have a number of metrics - def metrics(environment, timeframe_start: nil, timeframe_end: nil) + def environment_metrics(environment) + raise NotImplementedError + end + + def deployment_metrics(deployment) raise NotImplementedError end end diff --git a/app/models/project_services/prometheus_service.rb b/app/models/project_services/prometheus_service.rb index 6a4479c4dbc..ec72cb6856d 100644 --- a/app/models/project_services/prometheus_service.rb +++ b/app/models/project_services/prometheus_service.rb @@ -63,45 +63,31 @@ class PrometheusService < MonitoringService { success: false, result: err } end - def metrics(environment, timeframe_start: nil, timeframe_end: nil) - with_reactive_cache(environment.slug, timeframe_start, timeframe_end) do |data| - data - end + def environment_metrics(environment) + with_reactive_cache(Gitlab::Prometheus::Queries::EnvironmentQuery.name, environment.id, &:itself) + end + + def deployment_metrics(deployment) + metrics = with_reactive_cache(Gitlab::Prometheus::Queries::DeploymentQuery.name, deployment.id, &:itself) + metrics&.merge(deployment_time: created_at.to_i) || {} end # Cache metrics for specific environment - def calculate_reactive_cache(environment_slug, timeframe_start, timeframe_end) + def calculate_reactive_cache(query_class_name, *args) return unless active? && project && !project.pending_delete? - timeframe_start = Time.parse(timeframe_start) if timeframe_start - timeframe_end = Time.parse(timeframe_end) if timeframe_end - - timeframe_start ||= 8.hours.ago - timeframe_end ||= Time.now - - memory_query = %{(sum(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / count(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"})) /1024/1024} - cpu_query = %{sum(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}) * 100} + metrics = Kernel.const_get(query_class_name).new(client).query(*args) { success: true, - metrics: { - # Average Memory used in MB - memory_values: client.query_range(memory_query, start: timeframe_start, stop: timeframe_end), - memory_current: client.query(memory_query, time: timeframe_end), - memory_previous: client.query(memory_query, time: timeframe_start), - # Average CPU Utilization - cpu_values: client.query_range(cpu_query, start: timeframe_start, stop: timeframe_end), - cpu_current: client.query(cpu_query, time: timeframe_end), - cpu_previous: client.query(cpu_query, time: timeframe_start) - }, + metrics: metrics, last_update: Time.now.utc } - rescue Gitlab::PrometheusError => err { success: false, result: err.message } end def client - @prometheus ||= Gitlab::Prometheus.new(api_url: api_url) + @prometheus ||= Gitlab::PrometheusClient.new(api_url: api_url) end end diff --git a/lib/gitlab/prometheus/queries/base_query.rb b/lib/gitlab/prometheus/queries/base_query.rb new file mode 100644 index 00000000000..2a2eb4ae57f --- /dev/null +++ b/lib/gitlab/prometheus/queries/base_query.rb @@ -0,0 +1,26 @@ +module Gitlab + module Prometheus + module Queries + class BaseQuery + attr_accessor :client + delegate :query_range, :query, to: :client, prefix: true + + def raw_memory_usage_query(environment_slug) + %{avg(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / 2^20} + end + + def raw_cpu_usage_query(environment_slug) + %{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) * 100} + end + + def initialize(client) + @client = client + end + + def query(*args) + raise NotImplementedError + end + end + end + end +end diff --git a/lib/gitlab/prometheus/queries/deployment_query.rb b/lib/gitlab/prometheus/queries/deployment_query.rb new file mode 100644 index 00000000000..2cc08731f8d --- /dev/null +++ b/lib/gitlab/prometheus/queries/deployment_query.rb @@ -0,0 +1,26 @@ +module Gitlab::Prometheus::Queries + class DeploymentQuery < BaseQuery + def query(deployment_id) + deployment = Deployment.find_by(id: deployment_id) + environment_slug = deployment.environment.slug + + memory_query = raw_memory_usage_query(environment_slug) + memory_avg_query = %{avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}[30m]))} + cpu_query = raw_cpu_usage_query(environment_slug) + cpu_avg_query = %{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[30m])) * 100} + + timeframe_start = (deployment.created_at - 30.minutes).to_f + timeframe_end = (deployment.created_at + 30.minutes).to_f + + { + memory_values: client_query_range(memory_query, start: timeframe_start, stop: timeframe_end), + memory_before: client_query(memory_avg_query, time: deployment.created_at.to_f), + memory_after: client_query(memory_avg_query, time: timeframe_end), + + cpu_values: client_query_range(cpu_query, start: timeframe_start, stop: timeframe_end), + cpu_before: client_query(cpu_avg_query, time: deployment.created_at.to_f), + cpu_after: client_query(cpu_avg_query, time: timeframe_end) + } + end + end +end diff --git a/lib/gitlab/prometheus/queries/environment_query.rb b/lib/gitlab/prometheus/queries/environment_query.rb new file mode 100644 index 00000000000..01d756d7284 --- /dev/null +++ b/lib/gitlab/prometheus/queries/environment_query.rb @@ -0,0 +1,20 @@ +module Gitlab::Prometheus::Queries + class EnvironmentQuery < BaseQuery + def query(environment_id) + environment = Environment.find_by(id: environment_id) + environment_slug = environment.slug + timeframe_start = 8.hours.ago.to_f + timeframe_end = Time.now.to_f + + memory_query = raw_memory_usage_query(environment_slug) + cpu_query = raw_cpu_usage_query(environment_slug) + + { + memory_values: client_query_range(memory_query, start: timeframe_start, stop: timeframe_end), + memory_current: client_query(memory_query, time: timeframe_end), + cpu_values: client_query_range(cpu_query, start: timeframe_start, stop: timeframe_end), + cpu_current: client_query(cpu_query, time: timeframe_end) + } + end + end +end diff --git a/lib/gitlab/prometheus.rb b/lib/gitlab/prometheus_client.rb index 37125980b1c..5b51a1779dd 100644 --- a/lib/gitlab/prometheus.rb +++ b/lib/gitlab/prometheus_client.rb @@ -2,7 +2,7 @@ module Gitlab PrometheusError = Class.new(StandardError) # Helper methods to interact with Prometheus network services & resources - class Prometheus + class PrometheusClient attr_reader :api_url def initialize(api_url:) @@ -15,7 +15,7 @@ module Gitlab def query(query, time: Time.now) get_result('vector') do - json_api_get('query', query: query, time: time.utc.to_f) + json_api_get('query', query: query, time: time.to_f) end end diff --git a/spec/controllers/projects/deployments_controller_spec.rb b/spec/controllers/projects/deployments_controller_spec.rb index 3de38bb4dac..4c69443314d 100644 --- a/spec/controllers/projects/deployments_controller_spec.rb +++ b/spec/controllers/projects/deployments_controller_spec.rb @@ -42,39 +42,68 @@ describe Projects::DeploymentsController do before do allow(controller).to receive(:deployment).and_return(deployment) end - - context 'when environment has no metrics' do + context 'when metrics are disabled' do before do - expect(deployment).to receive(:metrics).and_return(nil) + allow(deployment).to receive(:has_metrics?).and_return false end - it 'returns a empty response 204 resposne' do + it 'responds with not found' do get :metrics, deployment_params(id: deployment.id) - expect(response).to have_http_status(204) - expect(response.body).to eq('') + + expect(response).to be_not_found end end - context 'when environment has some metrics' do - let(:empty_metrics) do - { - success: true, - metrics: {}, - last_update: 42 - } + context 'when metrics are enabled' do + before do + allow(deployment).to receive(:has_metrics?).and_return true end - before do - expect(deployment).to receive(:metrics).and_return(empty_metrics) + context 'when environment has no metrics' do + before do + expect(deployment).to receive(:metrics).and_return(nil) + end + + it 'returns a empty response 204 resposne' do + get :metrics, deployment_params(id: deployment.id) + expect(response).to have_http_status(204) + expect(response.body).to eq('') + end end - it 'returns a metrics JSON document' do - get :metrics, deployment_params(id: deployment.id) + context 'when environment has some metrics' do + let(:empty_metrics) do + { + success: true, + metrics: {}, + last_update: 42 + } + end + + before do + expect(deployment).to receive(:metrics).and_return(empty_metrics) + end + + it 'returns a metrics JSON document' do + get :metrics, deployment_params(id: deployment.id) + + expect(response).to be_ok + expect(json_response['success']).to be(true) + expect(json_response['metrics']).to eq({}) + expect(json_response['last_update']).to eq(42) + end + end + + context 'when metrics service does not implement deployment metrics' do + before do + allow(deployment).to receive(:metrics).and_raise(NotImplementedError) + end + + it 'responds with not found' do + get :metrics, deployment_params(id: deployment.id) - expect(response).to be_ok - expect(json_response['success']).to be(true) - expect(json_response['metrics']).to eq({}) - expect(json_response['last_update']).to eq(42) + expect(response).to be_not_found + end end end end diff --git a/spec/lib/gitlab/prometheus/queries/deployment_query_spec.rb b/spec/lib/gitlab/prometheus/queries/deployment_query_spec.rb new file mode 100644 index 00000000000..cc7d7e57f06 --- /dev/null +++ b/spec/lib/gitlab/prometheus/queries/deployment_query_spec.rb @@ -0,0 +1,36 @@ +require 'spec_helper' + +describe Gitlab::Prometheus::Queries::DeploymentQuery, lib: true do + let(:environment) { create(:environment, slug: 'environment-slug') } + let(:deployment) { create(:deployment, environment: environment) } + + let(:client) { double('prometheus_client') } + subject { described_class.new(client) } + + around do |example| + Timecop.freeze { example.run } + end + + it 'sends appropriate queries to prometheus' do + start_time_matcher = be_within(0.5).of((deployment.created_at - 30.minutes).to_f) + stop_time_matcher = be_within(0.5).of((deployment.created_at + 30.minutes).to_f) + created_at_matcher = be_within(0.5).of(deployment.created_at.to_f) + + expect(client).to receive(:query_range).with('avg(container_memory_usage_bytes{container_name!="POD",environment="environment-slug"}) / 2^20', + start: start_time_matcher, stop: stop_time_matcher) + expect(client).to receive(:query).with('avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="environment-slug"}[30m]))', + time: created_at_matcher) + expect(client).to receive(:query).with('avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="environment-slug"}[30m]))', + time: stop_time_matcher) + + expect(client).to receive(:query_range).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment-slug"}[2m])) * 100', + start: start_time_matcher, stop: stop_time_matcher) + expect(client).to receive(:query).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment-slug"}[30m])) * 100', + time: created_at_matcher) + expect(client).to receive(:query).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment-slug"}[30m])) * 100', + time: stop_time_matcher) + + expect(subject.query(deployment.id)).to eq(memory_values: nil, memory_before: nil, memory_after: nil, + cpu_values: nil, cpu_before: nil, cpu_after: nil) + end +end diff --git a/spec/lib/gitlab/prometheus_spec.rb b/spec/lib/gitlab/prometheus_client_spec.rb index 9d67e3d2f37..2d8bd2f6b97 100644 --- a/spec/lib/gitlab/prometheus_spec.rb +++ b/spec/lib/gitlab/prometheus_client_spec.rb @@ -1,6 +1,6 @@ require 'spec_helper' -describe Gitlab::Prometheus, lib: true do +describe Gitlab::PrometheusClient, lib: true do include PrometheusHelpers subject { described_class.new(api_url: 'https://prometheus.example.com') } diff --git a/spec/models/deployment_spec.rb b/spec/models/deployment_spec.rb index 212fcd884a8..4bda7d4314a 100644 --- a/spec/models/deployment_spec.rb +++ b/spec/models/deployment_spec.rb @@ -52,7 +52,7 @@ describe Deployment, models: true do describe '#metrics' do let(:deployment) { create(:deployment) } - subject { deployment.metrics(1.hour) } + subject { deployment.metrics } context 'metrics are disabled' do it { is_expected.to eq({}) } @@ -63,16 +63,17 @@ describe Deployment, models: true do { success: true, metrics: {}, - last_update: 42 + last_update: 42, + deployment_time: 1494408956 } end before do - allow(deployment.project).to receive_message_chain(:monitoring_service, :metrics) + allow(deployment.project).to receive_message_chain(:monitoring_service, :deployment_metrics) .with(any_args).and_return(simple_metrics) end - it { is_expected.to eq(simple_metrics.merge(deployment_time: deployment.created_at.utc.to_i)) } + it { is_expected.to eq(simple_metrics) } end end diff --git a/spec/models/environment_spec.rb b/spec/models/environment_spec.rb index edc1c204014..12519de8636 100644 --- a/spec/models/environment_spec.rb +++ b/spec/models/environment_spec.rb @@ -393,7 +393,7 @@ describe Environment, models: true do it 'returns the metrics from the deployment service' do expect(project.monitoring_service) - .to receive(:metrics).with(environment) + .to receive(:environment_metrics).with(environment) .and_return(:fake_metrics) is_expected.to eq(:fake_metrics) diff --git a/spec/models/project_services/prometheus_service_spec.rb b/spec/models/project_services/prometheus_service_spec.rb index 82a3e2698c1..1f9d3c07b51 100644 --- a/spec/models/project_services/prometheus_service_spec.rb +++ b/spec/models/project_services/prometheus_service_spec.rb @@ -6,6 +6,7 @@ describe PrometheusService, models: true, caching: true do let(:project) { create(:prometheus_project) } let(:service) { project.prometheus_service } + let(:environment_query) { Gitlab::Prometheus::Queries::EnvironmentQuery } describe "Associations" do it { is_expected.to belong_to :project } @@ -45,49 +46,56 @@ describe PrometheusService, models: true, caching: true do end end - describe '#metrics' do + describe '#environment_metrics' do let(:environment) { build_stubbed(:environment, slug: 'env-slug') } around do |example| Timecop.freeze { example.run } end - context 'with valid data without time range' do - subject { service.metrics(environment) } + context 'with valid data' do + subject { service.environment_metrics(environment) } before do - stub_reactive_cache(service, prometheus_data, 'env-slug', nil, nil) + stub_reactive_cache(service, prometheus_data, environment_query, environment.id) end it 'returns reactive data' do is_expected.to eq(prometheus_data) end end + end + + describe '#deployment_metrics' do + let(:deployment) { build_stubbed(:deployment)} + let(:deployment_query) { Gitlab::Prometheus::Queries::DeploymentQuery } + + around do |example| + Timecop.freeze { example.run } + end - context 'with valid data with time range' do - let(:t_start) { 1.hour.ago.utc } - let(:t_end) { Time.now.utc } - subject { service.metrics(environment, timeframe_start: t_start, timeframe_end: t_end) } + context 'with valid data' do + subject { service.deployment_metrics(deployment) } before do - stub_reactive_cache(service, prometheus_data, 'env-slug', t_start, t_end) + stub_reactive_cache(service, prometheus_data, deployment_query, deployment.id) end it 'returns reactive data' do - is_expected.to eq(prometheus_data) + is_expected.to eq(prometheus_data.merge(deployment_time: deployment.created_at.to_i)) end end end describe '#calculate_reactive_cache' do - let(:environment) { build_stubbed(:environment, slug: 'env-slug') } + let(:environment) { create(:environment, slug: 'env-slug') } around do |example| Timecop.freeze { example.run } end subject do - service.calculate_reactive_cache(environment.slug, nil, nil) + service.calculate_reactive_cache(environment_query.to_s, environment.id) end context 'when service is inactive' do diff --git a/spec/support/prometheus_helpers.rb b/spec/support/prometheus_helpers.rb index 51987c7767d..6b9ebcf2bb3 100644 --- a/spec/support/prometheus_helpers.rb +++ b/spec/support/prometheus_helpers.rb @@ -1,10 +1,10 @@ module PrometheusHelpers def prometheus_memory_query(environment_slug) - %{(sum(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / count(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"})) /1024/1024} + %{avg(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / 2^20} end def prometheus_cpu_query(environment_slug) - %{sum(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}) * 100} + %{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) * 100} end def prometheus_ping_url(prometheus_query) @@ -88,10 +88,8 @@ module PrometheusHelpers metrics: { memory_values: prometheus_values_body('matrix').dig(:data, :result), memory_current: prometheus_value_body('vector').dig(:data, :result), - memory_previous: prometheus_value_body('vector').dig(:data, :result), cpu_values: prometheus_values_body('matrix').dig(:data, :result), - cpu_current: prometheus_value_body('vector').dig(:data, :result), - cpu_previous: prometheus_value_body('vector').dig(:data, :result) + cpu_current: prometheus_value_body('vector').dig(:data, :result) }, last_update: last_update } |