diff options
author | Kamil Trzciński <ayufan@ayufan.eu> | 2018-08-31 18:53:50 +0200 |
---|---|---|
committer | Kamil Trzciński <ayufan@ayufan.eu> | 2018-09-04 12:19:22 +0200 |
commit | 0a9d771bcba036971ebc076112c4a62f2179e372 (patch) | |
tree | 5445b484556e0946931e002147cf40a2d6c1afa0 | |
parent | 05ee94beb70a2969b85563a0c41bf5afe48a3699 (diff) | |
download | gitlab-ce-0a9d771bcba036971ebc076112c4a62f2179e372.tar.gz |
Import common metrics into database.
This MR backports PrometheusMetric model to CE
and adds: common, identifier to figure out what kind of metric is used.
22 files changed, 665 insertions, 22 deletions
diff --git a/app/models/project.rb b/app/models/project.rb index 67593c9b2fe..435ecc59bc4 100644 --- a/app/models/project.rb +++ b/app/models/project.rb @@ -232,6 +232,8 @@ class Project < ActiveRecord::Base has_many :clusters, through: :cluster_project, class_name: 'Clusters::Cluster' has_many :cluster_ingresses, through: :clusters, source: :application_ingress, class_name: 'Clusters::Applications::Ingress' + has_many :prometheus_metrics + # Container repositories need to remove data from the container registry, # which is not managed by the DB. Hence we're still using dependent: :destroy # here. diff --git a/app/models/prometheus_metric.rb b/app/models/prometheus_metric.rb new file mode 100644 index 00000000000..87ea0762856 --- /dev/null +++ b/app/models/prometheus_metric.rb @@ -0,0 +1,96 @@ +class PrometheusMetric < ActiveRecord::Base + belongs_to :project, validate: true, inverse_of: :prometheus_metrics + + enum group: { + # built-in groups + nginx_ingress: -1, + ha_proxy: -2, + aws_elb: -3, + nginx: -4, + kubernetes: -5, + + # custom/user groups + business: 0, + response: 1, + system: 2 + } + + validates :title, presence: true + validates :query, presence: true + validates :group, presence: true + validates :y_label, presence: true + validates :unit, presence: true + + validate :require_project + + scope :common, -> { where(common: true) } + + GROUP_TITLES = { + # built-in groups + nginx_ingress: _('Response metrics (NGINX Ingress)'), + ha_proxy: _('Response metrics (HA Proxy)'), + aws_elb: _('Response metrics (AWS ELB)'), + nginx: _('Response metrics (NGINX)'), + kubernetes: _('System metrics (Kubernetes)'), + + # custom/user groups + business: _('Business metrics (Custom)'), + response: _('Response metrics (Custom)'), + system: _('System metrics (Custom)') + }.freeze + + REQUIRED_METRICS = { + nginx_ingress: %w(nginx_upstream_responses_total nginx_upstream_response_msecs_avg), + ha_proxy: %w(haproxy_frontend_http_requests_total haproxy_frontend_http_responses_total), + aws_elb: %w(aws_elb_request_count_sum aws_elb_latency_average aws_elb_httpcode_backend_5_xx_sum), + nginx: %w(nginx_server_requests nginx_server_requestMsec), + kubernetes: %w(container_memory_usage_bytes container_cpu_usage_seconds_total) + }.freeze + + def group_title + GROUP_TITLES[group.to_sym] + end + + def required_metrics + (REQUIRED_METRICS[group.to_sym] || []).map(&:to_s) + end + + def to_query_metric + Gitlab::Prometheus::Metric.new(id: id, title: title, required_metrics: required_metrics, weight: 0, y_label: y_label, queries: queries) + end + + def queries + [ + { + query_range: query, + unit: unit, + label: legend, + series: query_series + } + ] + end + + def query_series + case legend + when 'Status Code' + { + label: 'status_code', + when: [ + { value: '2xx', color: 'green' }, + { value: '4xx', color: 'orange' }, + { value: '5xx', color: 'red' } + ] + } + end + end + + private + + def require_project + if project + errors.add(:project, "cannot be set if this is common metric") if common? + else + errors.add(:project, "has to be set when this is project-specific metric") unless common? + end + end +end diff --git a/changelogs/unreleased/alerts-for-built-in-metrics.yml b/changelogs/unreleased/alerts-for-built-in-metrics.yml new file mode 100644 index 00000000000..f88077eaca0 --- /dev/null +++ b/changelogs/unreleased/alerts-for-built-in-metrics.yml @@ -0,0 +1,5 @@ +--- +title: Import all common metrics into database +merge_request: +author: +type: changed diff --git a/config/prometheus/additional_metrics.yml b/config/prometheus/common_metrics.yml index c994bad7865..61ade829cfa 100644 --- a/config/prometheus/additional_metrics.yml +++ b/config/prometheus/common_metrics.yml @@ -7,7 +7,8 @@ - nginx_upstream_responses_total weight: 1 queries: - - query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)' + - id: response_metrics_nginx_ingress_throughput_status_code + query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)' unit: req / sec label: Status Code series: @@ -25,7 +26,8 @@ - nginx_upstream_response_msecs_avg weight: 1 queries: - - query_range: 'avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"})' + - id: response_metrics_nginx_ingress_latency_pod_average + query_range: 'avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"})' label: Pod average unit: ms - title: "HTTP Error Rate" @@ -34,7 +36,8 @@ - nginx_upstream_responses_total weight: 1 queries: - - query_range: 'sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) / sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) * 100' + - id: response_metrics_nginx_ingress_http_error_rate + query_range: 'sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) / sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) * 100' label: 5xx Errors unit: "%" - group: Response metrics (HA Proxy) @@ -46,10 +49,12 @@ - haproxy_frontend_http_requests_total weight: 1 queries: - - query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)' + - id: response_metrics_ha_proxy_throughput_status_code + query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)' unit: req / sec + label: Status Code series: - - label: code + - label: status_code when: - value: 2xx color: green @@ -63,7 +68,8 @@ - haproxy_frontend_http_responses_total weight: 1 queries: - - query_range: 'sum(rate(haproxy_frontend_http_responses_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_responses_total{%{environment_filter}}[2m]))' + - id: response_metrics_ha_proxy_http_error_rate + query_range: 'sum(rate(haproxy_frontend_http_responses_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_responses_total{%{environment_filter}}[2m]))' label: HTTP Errors unit: "%" - group: Response metrics (AWS ELB) @@ -75,7 +81,8 @@ - aws_elb_request_count_sum weight: 1 queries: - - query_range: 'sum(aws_elb_request_count_sum{%{environment_filter}}) / 60' + - id: response_metrics_aws_elb_throughput_requests + query_range: 'sum(aws_elb_request_count_sum{%{environment_filter}}) / 60' label: Total unit: req / sec - title: "Latency" @@ -84,7 +91,8 @@ - aws_elb_latency_average weight: 1 queries: - - query_range: 'avg(aws_elb_latency_average{%{environment_filter}}) * 1000' + - id: response_metrics_aws_elb_latency_average + query_range: 'avg(aws_elb_latency_average{%{environment_filter}}) * 1000' label: Average unit: ms - title: "HTTP Error Rate" @@ -94,7 +102,8 @@ - aws_elb_httpcode_backend_5_xx_sum weight: 1 queries: - - query_range: 'sum(aws_elb_httpcode_backend_5_xx_sum{%{environment_filter}}) / sum(aws_elb_request_count_sum{%{environment_filter}})' + - id: response_metrics_aws_elb_http_error_rate + query_range: 'sum(aws_elb_httpcode_backend_5_xx_sum{%{environment_filter}}) / sum(aws_elb_request_count_sum{%{environment_filter}})' label: HTTP Errors unit: "%" - group: Response metrics (NGINX) @@ -106,7 +115,8 @@ - nginx_server_requests weight: 1 queries: - - query_range: 'sum(rate(nginx_server_requests{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (code)' + - id: response_metrics_nginx_throughput_status_code + query_range: 'sum(rate(nginx_server_requests{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (code)' unit: req / sec label: Status Code series: @@ -124,7 +134,8 @@ - nginx_server_requestMsec weight: 1 queries: - - query_range: 'avg(nginx_server_requestMsec{%{environment_filter}})' + - id: response_metrics_nginx_latency + query_range: 'avg(nginx_server_requestMsec{%{environment_filter}})' label: Upstream unit: ms - title: "HTTP Error Rate" @@ -133,7 +144,8 @@ - nginx_server_requests weight: 1 queries: - - query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))' + - id: response_metrics_nginx_http_error_rate + query_range: 'sum(rate(nginx_server_requests{code="5xx", %{environment_filter}}[2m]))' label: HTTP Errors unit: "errors / sec" - group: System metrics (Kubernetes) @@ -145,7 +157,8 @@ - container_memory_usage_bytes weight: 4 queries: - - query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) /1024/1024/1024' + - id: system_metrics_kubernetes_container_memory_total + query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) /1024/1024/1024' label: Total unit: GB - title: "Core Usage (Total)" @@ -154,7 +167,8 @@ - container_cpu_usage_seconds_total weight: 3 queries: - - query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)' + - id: system_metrics_kubernetes_container_cores_total + query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job)' label: Total unit: "cores" - title: "Memory Usage (Pod average)" @@ -163,7 +177,8 @@ - container_memory_usage_bytes weight: 2 queries: - - query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024' + - id: system_metrics_kubernetes_container_memory_average + query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}) without (job)) /1024/1024' label: Pod average unit: MB - title: "Core Usage (Pod average)" @@ -172,6 +187,12 @@ - container_cpu_usage_seconds_total weight: 1 queries: - - query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))' + - id: system_metrics_kubernetes_container_core_usage + query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))' label: Pod average - unit: "cores"
\ No newline at end of file + unit: "cores" + - id: system_metrics_kubernetes_container_core_usage_canary + query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^%{ci_environment_slug}-canary-(.*)",namespace="%{kube_namespace}"}[15m])) by (pod_name))' + label: Pod average + unit: "cores" + track: canary diff --git a/db/fixtures/development/99_common_metrics.rb b/db/fixtures/development/99_common_metrics.rb new file mode 100644 index 00000000000..89424905bd4 --- /dev/null +++ b/db/fixtures/development/99_common_metrics.rb @@ -0,0 +1,3 @@ +require_relative '../importers/common_metrics_importer.rb' + +::Importers::CommonMetricsImporter.new.execute diff --git a/db/fixtures/production/999_common_metrics.rb b/db/fixtures/production/999_common_metrics.rb new file mode 100644 index 00000000000..89424905bd4 --- /dev/null +++ b/db/fixtures/production/999_common_metrics.rb @@ -0,0 +1,3 @@ +require_relative '../importers/common_metrics_importer.rb' + +::Importers::CommonMetricsImporter.new.execute diff --git a/db/importers/common_metrics_importer.rb b/db/importers/common_metrics_importer.rb new file mode 100644 index 00000000000..9e6d2dd0518 --- /dev/null +++ b/db/importers/common_metrics_importer.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +module Importers + class PrometheusMetric < ActiveRecord::Base + enum group: { + # built-in groups + nginx_ingress: -1, + ha_proxy: -2, + aws_elb: -3, + nginx: -4, + kubernetes: -5, + + # custom groups + business: 0, + response: 1, + system: 2, + } + + scope :common, -> { where(common: true) } + + GROUP_TITLES = { + business: _('Business metrics (Custom)'), + response: _('Response metrics (Custom)'), + system: _('System metrics (Custom)'), + nginx_ingress: _('Response metrics (NGINX Ingress)'), + ha_proxy: _('Response metrics (HA Proxy)'), + aws_elb: _('Response metrics (AWS ELB)'), + nginx: _('Response metrics (NGINX)'), + kubernetes: _('System metrics (Kubernetes)') + }.freeze + end + + class CommonMetricsImporter + MissingQueryId = Class.new(StandardError) + + attr_reader :content + + def initialize(file = 'config/prometheus/common_metrics.yml') + @content = YAML.load_file(file) + end + + def execute + process_content do |id, attributes| + find_or_build_metric!(id) + .update!(**attributes) + end + end + + private + + def process_content(&blk) + content.map do |group| + process_group(group, &blk) + end + end + + def process_group(group, &blk) + attributes = { + group: find_group_title_key(group['group']) + } + + group['metrics'].map do |metric| + process_metric(metric, attributes, &blk) + end + end + + def process_metric(metric, attributes, &blk) + attributes = attributes.merge( + title: metric['title'], + y_label: metric['y_label']) + + metric['queries'].map do |query| + process_metric_query(query, attributes, &blk) + end + end + + def process_metric_query(query, attributes, &blk) + attributes = attributes.merge( + legend: query['label'], + query: query['query_range'], + unit: query['unit']) + + blk.call(query['id'], attributes) + end + + def find_or_build_metric!(id) + raise MissingQueryId unless id + + PrometheusMetric.common.find_by(identifier: id) || + PrometheusMetric.new(common: true, identifier: id) + end + + def find_group_title_key(title) + PrometheusMetric.groups[find_group_title(title)] + end + + def find_group_title(title) + PrometheusMetric::GROUP_TITLES.invert[title] + end + end +end diff --git a/db/migrate/20180101160629_create_prometheus_metrics.rb b/db/migrate/20180101160629_create_prometheus_metrics.rb new file mode 100644 index 00000000000..da6371084cb --- /dev/null +++ b/db/migrate/20180101160629_create_prometheus_metrics.rb @@ -0,0 +1,16 @@ +class CreatePrometheusMetrics < ActiveRecord::Migration + DOWNTIME = false + + def change + create_table :prometheus_metrics do |t| + t.references :project, index: true, foreign_key: { on_delete: :cascade }, null: false + t.string :title, null: false + t.string :query, null: false + t.string :y_label + t.string :unit + t.string :legend + t.integer :group, null: false, index: true + t.timestamps_with_timezone null: false + end + end +end diff --git a/db/migrate/20180831164905_add_common_to_prometheus_metrics.rb b/db/migrate/20180831164905_add_common_to_prometheus_metrics.rb new file mode 100644 index 00000000000..43e36d2047c --- /dev/null +++ b/db/migrate/20180831164905_add_common_to_prometheus_metrics.rb @@ -0,0 +1,15 @@ +class AddCommonToPrometheusMetrics < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def up + add_column_with_default(:prometheus_metrics, :common, :boolean, default: false) + end + + def down + remove_column(:prometheus_metrics, :common) + end +end diff --git a/db/migrate/20180831164906_change_project_id_for_prometheus_metrics.rb b/db/migrate/20180831164906_change_project_id_for_prometheus_metrics.rb new file mode 100644 index 00000000000..c9aae868e52 --- /dev/null +++ b/db/migrate/20180831164906_change_project_id_for_prometheus_metrics.rb @@ -0,0 +1,11 @@ +class ChangeProjectIdForPrometheusMetrics < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def change + change_column_null :prometheus_metrics, :project_id, true + end +end diff --git a/db/migrate/20180831164907_add_index_on_default_prometheus_metrics.rb b/db/migrate/20180831164907_add_index_on_default_prometheus_metrics.rb new file mode 100644 index 00000000000..2e2014ccdce --- /dev/null +++ b/db/migrate/20180831164907_add_index_on_default_prometheus_metrics.rb @@ -0,0 +1,15 @@ +class AddIndexOnDefaultPrometheusMetrics < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + disable_ddl_transaction! + + def up + add_concurrent_index :prometheus_metrics, :common + end + + def down + remove_concurrent_index :prometheus_metrics, :project_id + end +end diff --git a/db/migrate/20180831164908_add_identifier_to_prometheus_metric.rb b/db/migrate/20180831164908_add_identifier_to_prometheus_metric.rb new file mode 100644 index 00000000000..d5f33a293cc --- /dev/null +++ b/db/migrate/20180831164908_add_identifier_to_prometheus_metric.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +class AddIdentifierToPrometheusMetric < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + + def change + add_column :prometheus_metrics, :identifier, :string, unique: true + end +end diff --git a/db/migrate/20180831164909_import_common_metrics.rb b/db/migrate/20180831164909_import_common_metrics.rb new file mode 100644 index 00000000000..adb35a9b211 --- /dev/null +++ b/db/migrate/20180831164909_import_common_metrics.rb @@ -0,0 +1,15 @@ +class ImportCommonMetrics < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + require_relative '../importers/common_metrics_importer.rb' + + DOWNTIME = false + + def up + Importers::CommonMetricsImporter.new.execute + end + + def down + # no-op + end +end diff --git a/db/schema.rb b/db/schema.rb index 02e545bec7d..c3d4eccd826 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20180826111825) do +ActiveRecord::Schema.define(version: 20180831164909) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -1696,6 +1696,24 @@ ActiveRecord::Schema.define(version: 20180826111825) do add_index "projects", ["star_count"], name: "index_projects_on_star_count", using: :btree add_index "projects", ["visibility_level"], name: "index_projects_on_visibility_level", using: :btree + create_table "prometheus_metrics", force: :cascade do |t| + t.integer "project_id" + t.string "title", null: false + t.string "query", null: false + t.string "y_label" + t.string "unit" + t.string "legend" + t.integer "group", null: false + t.datetime_with_timezone "created_at", null: false + t.datetime_with_timezone "updated_at", null: false + t.boolean "common", default: false, null: false + t.string "identifier" + end + + add_index "prometheus_metrics", ["common"], name: "index_prometheus_metrics_on_common", using: :btree + add_index "prometheus_metrics", ["group"], name: "index_prometheus_metrics_on_group", using: :btree + add_index "prometheus_metrics", ["project_id"], name: "index_prometheus_metrics_on_project_id", using: :btree + create_table "protected_branch_merge_access_levels", force: :cascade do |t| t.integer "protected_branch_id", null: false t.integer "access_level", default: 40, null: false @@ -2375,6 +2393,7 @@ ActiveRecord::Schema.define(version: 20180826111825) do add_foreign_key "project_import_data", "projects", name: "fk_ffb9ee3a10", on_delete: :cascade add_foreign_key "project_mirror_data", "projects", on_delete: :cascade add_foreign_key "project_statistics", "projects", on_delete: :cascade + add_foreign_key "prometheus_metrics", "projects", on_delete: :cascade add_foreign_key "protected_branch_merge_access_levels", "protected_branches", name: "fk_8a3072ccb3", on_delete: :cascade add_foreign_key "protected_branch_push_access_levels", "protected_branches", name: "fk_9ffc86a3d9", on_delete: :cascade add_foreign_key "protected_branches", "projects", name: "fk_7a9c6d93e7", on_delete: :cascade diff --git a/doc/user/project/integrations/prometheus_library/metrics.md b/doc/user/project/integrations/prometheus_library/metrics.md index 96a22316265..1e0f9d4e249 100644 --- a/doc/user/project/integrations/prometheus_library/metrics.md +++ b/doc/user/project/integrations/prometheus_library/metrics.md @@ -20,6 +20,45 @@ GitLab uses the defined queries and fills in the environment specific variables. ## Adding to the library -We strive to support the 2-4 most important metrics for each common system service that supports Prometheus. If you are looking for support for a particular exporter which has not yet been added to the library, additions can be made [to the `additional_metrics.yml`](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/prometheus/additional_metrics.yml) file. +We strive to support the 2-4 most important metrics for each common system service that supports Prometheus. If you are looking for support for a particular exporter which has not yet been added to the library, additions can be made [to the `common_metrics.yml`](https://gitlab.com/gitlab-org/gitlab-ce/blob/master/config/prometheus/common_metrics.yml) file. -> Note: The library is only for monitoring public, common, system services which all customers can benefit from. Support for monitoring [customer proprietary metrics](https://gitlab.com/gitlab-org/gitlab-ee/issues/2273) will be added in a subsequent release. +### Query identifier + +The requirement for adding metrics is to have each query to have unique identifier. +Identifier is used to update the metric later when changed. + +```yaml +- group: Response metrics (NGINX Ingress) + metrics: + - title: "Throughput" + y_label: "Requests / Sec" + queries: + - id: response_metrics_nginx_ingress_throughput_status_code + query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)' + unit: req / sec + label: Status Code +``` + +### Update existing metrics + +After you add or change existing _common_ metric you have to create a new database migration that will query and update all existing metrics. + +**Note: If a query metric (which is identified by `id:`) is removed it will not be removed from database by default.** +**You might want to add additional database migration that makes a decision what to do with removed one.** +**For example: you might be interested in migrating all dependent data to a different metric.** + +```ruby +class ImportCommonMetrics < ActiveRecord::Migration + require_relative '../importers/common_metrics_importer.rb' + + DOWNTIME = false + + def up + Importers::CommonMetricsImporter.new.execute + end + + def down + # no-op + end +end +``` diff --git a/lib/gitlab/prometheus/additional_metrics_parser.rb b/lib/gitlab/prometheus/additional_metrics_parser.rb index bb1172f82a1..a240d090074 100644 --- a/lib/gitlab/prometheus/additional_metrics_parser.rb +++ b/lib/gitlab/prometheus/additional_metrics_parser.rb @@ -5,7 +5,7 @@ module Gitlab MUTEX = Mutex.new extend self - def load_groups_from_yaml(file_name = 'additional_metrics.yml') + def load_groups_from_yaml(file_name) yaml_metrics_raw(file_name).map(&method(:group_from_entry)) end diff --git a/lib/gitlab/prometheus/metric_group.rb b/lib/gitlab/prometheus/metric_group.rb index e91c6fb2e27..d696a8fc00c 100644 --- a/lib/gitlab/prometheus/metric_group.rb +++ b/lib/gitlab/prometheus/metric_group.rb @@ -4,10 +4,13 @@ module Gitlab include ActiveModel::Model attr_accessor :name, :priority, :metrics + validates :name, :priority, :metrics, presence: true def self.common_metrics - AdditionalMetricsParser.load_groups_from_yaml + ::PrometheusMetric.common.group_by(&:group_title).map do |name, metrics| + MetricGroup.new(name: name, priority: 0, metrics: metrics.map(&:to_query_metric)) + end end # EE only diff --git a/spec/db/importers/common_metrics_importer_spec.rb b/spec/db/importers/common_metrics_importer_spec.rb new file mode 100644 index 00000000000..81eba6377c3 --- /dev/null +++ b/spec/db/importers/common_metrics_importer_spec.rb @@ -0,0 +1,119 @@ +require 'rails_helper' +require Rails.root.join("db", "importers", "common_metrics_importer.rb") + +describe Importers::PrometheusMetric do + it 'group enum equals ::PrometheusMetric' do + expect(described_class.groups).to eq(::PrometheusMetric.groups) + end + + it 'GROUP_TITLES equals ::PrometheusMetric' do + expect(described_class::GROUP_TITLES).to eq(::PrometheusMetric::GROUP_TITLES) + end +end + +describe Importers::CommonMetricsImporter do + subject { described_class.new } + + context "does import common_metrics.yml" do + let(:groups) { subject.content } + let(:metrics) { groups.map { |group| group['metrics'] }.flatten } + let(:queries) { metrics.map { |group| group['queries'] }.flatten } + let(:query_ids) { queries.map { |query| query['id'] } } + + before do + subject.execute + end + + it "has the same amount of groups" do + expect(PrometheusMetric.common.group(:group).count.count).to eq(groups.count) + end + + it "has the same amount of metrics" do + expect(PrometheusMetric.common.group(:group, :title).count.count).to eq(metrics.count) + end + + it "has the same amount of queries" do + expect(PrometheusMetric.common.count).to eq(queries.count) + end + + it "does not have duplicate IDs" do + expect(query_ids).to eq(query_ids.uniq) + end + + it "imports all IDs" do + expect(PrometheusMetric.common.pluck(:identifier)).to eq(query_ids) + end + end + + context 'does import properly all fields' do + let(:query_identifier) { 'response-metric' } + let(:group) do + { + group: 'Response metrics (NGINX Ingress)', + metrics: [{ + title: "Throughput", + y_label: "Requests / Sec", + queries: [{ + id: query_identifier, + query_range: 'my-query', + unit: 'my-unit', + label: 'status code' + }] + }] + } + end + + before do + expect(subject).to receive(:content) { [group.deep_stringify_keys] } + end + + shared_examples 'stores metric' do + let(:metric) { PrometheusMetric.find_by(identifier: query_identifier) } + + it 'with all data' do + expect(metric.group).to eq('nginx_ingress') + expect(metric.title).to eq('Throughput') + expect(metric.y_label).to eq('Requests / Sec') + expect(metric.unit).to eq('my-unit') + expect(metric.legend).to eq('status code') + expect(metric.query).to eq('my-query') + end + end + + context 'if ID is missing' do + let(:query_identifier) { } + + it 'raises exception' do + expect { subject.execute }.to raise_error(described_class::MissingQueryId) + end + end + + context 'for existing common metric with different ID' do + let!(:existing_metric) { create(:prometheus_metric, :common, identifier: 'my-existing-metric') } + + before do + subject.execute + end + + it_behaves_like 'stores metric' do + it 'and existing metric is not changed' do + expect(metric).not_to eq(existing_metric) + end + end + end + + context 'when metric with ID exists ' do + let!(:existing_metric) { create(:prometheus_metric, :common, identifier: 'response-metric') } + + before do + subject.execute + end + + it_behaves_like 'stores metric' do + it 'and existing metric is changed' do + expect(metric).to eq(existing_metric) + end + end + end + end +end
\ No newline at end of file diff --git a/spec/factories/prometheus_metrics.rb b/spec/factories/prometheus_metrics.rb new file mode 100644 index 00000000000..0ab237d6b3e --- /dev/null +++ b/spec/factories/prometheus_metrics.rb @@ -0,0 +1,16 @@ +FactoryBot.define do + factory :prometheus_metric, class: PrometheusMetric do + title 'title' + query 'avg(metric)' + y_label 'y_label' + unit 'm/s' + group :business + project + legend 'legend' + + trait :common do + common true + project nil + end + end +end diff --git a/spec/lib/gitlab/prometheus/metric_group_spec.rb b/spec/lib/gitlab/prometheus/metric_group_spec.rb new file mode 100644 index 00000000000..fa36b83f575 --- /dev/null +++ b/spec/lib/gitlab/prometheus/metric_group_spec.rb @@ -0,0 +1,22 @@ +require 'rails_helper' + +describe Gitlab::Prometheus::MetricGroup do + describe '.common_metrics' do + set(:project_metric) { create(:prometheus_metric) } + set(:common_metric_group_a) { create(:prometheus_metric, :common, group: :aws_elb) } + set(:common_metric_group_b_q1) { create(:prometheus_metric, :common, group: :kubernetes) } + set(:common_metric_group_b_q2) { create(:prometheus_metric, :common, group: :kubernetes) } + + subject { described_class.common_metrics } + + it 'returns exactly two groups' do + expect(subject.map(&:name)).to contain_exactly('Response metrics (AWS ELB)', 'System metrics (Kubernetes)') + end + + it 'returns exactly three metric queries' do + expect(subject.map(&:metrics).flatten.map(&:queries)).to contain_exactly( + common_metric_group_a.queries, common_metric_group_b_q1.queries, + common_metric_group_b_q2.queries) + end + end +end diff --git a/spec/migrations/import_common_metrics_spec.rb b/spec/migrations/import_common_metrics_spec.rb new file mode 100644 index 00000000000..83ecb39a6ee --- /dev/null +++ b/spec/migrations/import_common_metrics_spec.rb @@ -0,0 +1,14 @@ +require 'spec_helper' +require Rails.root.join('db', 'migrate', '20180831164909_import_common_metrics.rb') + +describe ImportCommonMetrics, :migration do + describe '#up' do + it "imports all prometheus metrics" do + expect(PrometheusMetric.common).to be_empty + + migrate! + + expect(PrometheusMetric.common).not_to be_empty + end + end +end diff --git a/spec/models/prometheus_metric_spec.rb b/spec/models/prometheus_metric_spec.rb new file mode 100644 index 00000000000..7469832898a --- /dev/null +++ b/spec/models/prometheus_metric_spec.rb @@ -0,0 +1,97 @@ +require 'spec_helper' + +describe PrometheusMetric do + subject { build(:prometheus_metric) } + let(:other_project) { build(:project) } + + it { is_expected.to belong_to(:project) } + it { is_expected.to validate_presence_of(:title) } + it { is_expected.to validate_presence_of(:query) } + it { is_expected.to validate_presence_of(:group) } + + describe 'common metrics' do + using RSpec::Parameterized::TableSyntax + + where(:common, :project, :result) do + false | other_project | true + false | nil | false + true | other_project | false + true | nil | true + end + + with_them do + before do + subject.common = common + subject.project = project + end + + it { expect(subject.valid?).to eq(result) } + end + end + + describe '#query_series' do + using RSpec::Parameterized::TableSyntax + + where(:legend, :type) do + 'Some other legend' | NilClass + 'Status Code' | Hash + end + + with_them do + before do + subject.legend = legend + end + + it { expect(subject.query_series).to be_a(type) } + end + end + + describe '#group_title' do + shared_examples 'group_title' do |group, title| + subject { build(:prometheus_metric, group: group).group_title } + + it "returns text #{title} for group #{group}" do + expect(subject).to eq(title) + end + end + + it_behaves_like 'group_title', :business, 'Business metrics (Custom)' + it_behaves_like 'group_title', :response, 'Response metrics (Custom)' + it_behaves_like 'group_title', :system, 'System metrics (Custom)' + end + + describe '#to_query_metric' do + it 'converts to queryable metric object' do + expect(subject.to_query_metric).to be_instance_of(Gitlab::Prometheus::Metric) + end + + it 'queryable metric object has title' do + expect(subject.to_query_metric.title).to eq(subject.title) + end + + it 'queryable metric object has y_label' do + expect(subject.to_query_metric.y_label).to eq(subject.y_label) + end + + it 'queryable metric has no required_metric' do + expect(subject.to_query_metric.required_metrics).to eq([]) + end + + it 'queryable metric has weight 0' do + expect(subject.to_query_metric.weight).to eq(0) + end + + it 'queryable metrics has query description' do + queries = [ + { + query_range: subject.query, + unit: subject.unit, + label: subject.legend, + series: nil + } + ] + + expect(subject.to_query_metric.queries).to eq(queries) + end + end +end |