Merge branch '37644-update-prometheus-metric-library-to-support-multiple-series-and-colors' into 'master'

Update Prometheus Metric Library to support multiple series and colors Closes #37644 See merge request !14155
author: Tim Zallmann <tzallmann@gitlab.com> 2017-09-11 12:56:32 +0000
committer: Tim Zallmann <tzallmann@gitlab.com> 2017-09-11 12:56:32 +0000
commit: 9321a6cdc879b15eeb6e57506d4a4233f49afaae (patch)
tree: 7148bfd593de6437816f8d0cd8f47818285c1b19
parent: eeacda2297131067d0f0e7c5801d6700175520f9 (diff)
parent: 9297377653d9b28607f454cd548af028a325725e (diff)
download: gitlab-ce-9321a6cdc879b15eeb6e57506d4a4233f49afaae.tar.gz
5 files changed, 60 insertions, 18 deletions
diff --git a/config/prometheus/additional_metrics.yml b/config/prometheus/additional_metrics.yml
index 0642a0b2fe9..33b897f46e2 100644
--- a/config/prometheus/additional_metrics.yml
+++ b/config/prometheus/additional_metrics.yml
@@ -4,12 +4,21 @@
   - title: "Throughput"
     y_label: "Requests / Sec"
     required_metrics:
-      - nginx_upstream_requests_total
+      - nginx_upstream_responses_total
     weight: 1
     queries:
-    - query_range: 'sum(rate(nginx_upstream_requests_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m]))'
-      label: Total
+    - query_range: 'sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code)'
       unit: req / sec
+      label: Status Code
+      series:
+        - label: status_code
+          when:
+            - value: 2xx
+              color: green
+            - value: 4xx
+              color: orange
+            - value: 5xx
+              color: red
   - title: "Latency"
     y_label: "Latency (ms)"
     required_metrics:
@@ -37,9 +46,17 @@
       - haproxy_frontend_http_requests_total
     weight: 1
     queries:
-    - query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m]))'
-      label: Total
+    - query_range: 'sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code)'
       unit: req / sec
+      series:
+        - label: code
+          when:
+            - value: 2xx
+              color: green
+            - value: 4xx
+              color: yellow
+            - value: 5xx
+              color: red
   - title: "HTTP Error Rate"
     y_label: "Error Rate (%)"
     required_metrics:
@@ -86,12 +103,21 @@
   - title: "Throughput"
     y_label: "Requests / Sec"
     required_metrics:
-      - nginx_requests_total
+      - nginx_responses_total
     weight: 1
     queries:
-    - query_range: 'sum(rate(nginx_requests_total{server_zone!="*", server_zone!="_", %{environment_filter}}[2m]))'
-      label: Total
+    - query_range: 'sum(rate(nginx_responses_total{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (status_code)'
       unit: req / sec
+      label: Status Code
+      series:
+        - label: status_code
+          when:
+            - value: 2xx
+              color: green
+            - value: 4xx
+              color: orange
+            - value: 5xx
+              color: red
   - title: "Latency"
     y_label: "Latency (ms)"
     required_metrics:
@@ -128,6 +154,8 @@
      - container_cpu_usage_seconds_total
     weight: 1
     queries:
-    - query_range: 'sum(rate(container_cpu_usage_seconds_total{container_name!="POD",%{environment_filter}}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",%{environment_filter}}) * 100'
-      label: Average
+    - query_range: 'sum(rate(container_cpu_usage_seconds_total{container_name!="POD",%{environment_filter}}[2m])) by (cpu)  * 100'
+      label: CPU
       unit: "%"
+      series:
+        - label: cpu
diff --git a/doc/user/project/integrations/prometheus_library/haproxy.md b/doc/user/project/integrations/prometheus_library/haproxy.md
index f2939f047a3..d4b5911a91c 100644
--- a/doc/user/project/integrations/prometheus_library/haproxy.md
+++ b/doc/user/project/integrations/prometheus_library/haproxy.md
@@ -7,7 +7,7 @@ GitLab has support for automatically detecting and monitoring HAProxy. This is p
 
 | Name | Query |
 | ---- | ----- |
-| Throughput (req/sec) | sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) |
+| Throughput (req/sec) | sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) by (code) |
 | HTTP Error Rate (%) | sum(rate(haproxy_frontend_http_requests_total{code="5xx",%{environment_filter}}[2m])) / sum(rate(haproxy_frontend_http_requests_total{%{environment_filter}}[2m])) |
 
 ## Configuring Prometheus to monitor for HAProxy metrics
diff --git a/doc/user/project/integrations/prometheus_library/kubernetes.md b/doc/user/project/integrations/prometheus_library/kubernetes.md
index 9f0308d8111..4d39ae0c4fa 100644
--- a/doc/user/project/integrations/prometheus_library/kubernetes.md
+++ b/doc/user/project/integrations/prometheus_library/kubernetes.md
@@ -8,7 +8,7 @@ GitLab has support for automatically detecting and monitoring Kubernetes metrics
 | Name | Query |
 | ---- | ----- |
 | Average Memory Usage (MB) | (sum(container_memory_usage_bytes{container_name!="POD",%{environment_filter}}) / count(container_memory_usage_bytes{container_name!="POD",%{environment_filter}})) /1024/1024 |
-| Average CPU Utilization (%) | sum(rate(container_cpu_usage_seconds_total{container_name!="POD",%{environment_filter}}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",%{environment_filter}}) * 100 |
+| Average CPU Utilization (%) | sum(rate(container_cpu_usage_seconds_total{container_name!="POD",%{environment_filter}}[2m])) by (cpu)  * 100 |
 
 ## Configuring Prometheus to monitor for Kubernetes node metrics
 
diff --git a/doc/user/project/integrations/prometheus_library/nginx.md b/doc/user/project/integrations/prometheus_library/nginx.md
index 12e3321f5f3..bab22f9a384 100644
--- a/doc/user/project/integrations/prometheus_library/nginx.md
+++ b/doc/user/project/integrations/prometheus_library/nginx.md
@@ -7,7 +7,7 @@ GitLab has support for automatically detecting and monitoring NGINX. This is pro
 
 | Name | Query |
 | ---- | ----- |
-| Throughput (req/sec) | sum(rate(nginx_requests_total{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) |
+| Throughput (req/sec) | sum(rate(nginx_responses_total{server_zone!="*", server_zone!="_", %{environment_filter}}[2m])) by (status_code) |
 | Latency (ms) | avg(nginx_upstream_response_msecs_avg{%{environment_filter}}) |
 | HTTP Error Rate (HTTP Errors / sec) | rate(nginx_responses_total{status_code="5xx", %{environment_filter}}[2m])) |
 
diff --git a/doc/user/project/integrations/prometheus_library/nginx_ingress.md b/doc/user/project/integrations/prometheus_library/nginx_ingress.md
index 84ee8bc45e5..2a37cbd160b 100644
--- a/doc/user/project/integrations/prometheus_library/nginx_ingress.md
+++ b/doc/user/project/integrations/prometheus_library/nginx_ingress.md
@@ -7,19 +7,33 @@ GitLab has support for automatically detecting and monitoring the Kubernetes NGI
 
 | Name | Query |
 | ---- | ----- |
-| Throughput (req/sec) | sum(rate(nginx_upstream_requests_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) |
+| Throughput (req/sec) | sum(rate(nginx_upstream_responses_total{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) by (status_code) |
 | Latency (ms) | avg(nginx_upstream_response_msecs_avg{upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}) |
 | HTTP Error Rate (HTTP Errors / sec) | sum(rate(nginx_upstream_responses_total{status_code="5xx", upstream=~"%{kube_namespace}-%{ci_environment_slug}-.*"}[2m])) |
 
 ## Configuring Prometheus to monitor for NGINX ingress metrics
 
-The easiest way to get started is to use at least version 0.9.0 of [NGINX ingress](https://github.com/kubernetes/ingress/tree/master/controllers/nginx). If you are using NGINX as your Kubernetes ingress, there is [direct support](https://github.com/kubernetes/ingress/pull/423) for enabling Prometheus monitoring in the 0.9.0 release.
+If you have deployed with the [gitlab-omnibus](https://docs.gitlab.com/ee/install/kubernetes/gitlab_omnibus.md) Helm chart, and your application is running in the same cluster, no further action is required. The ingress metrics will be automatically enabled and annotated for Prometheus monitoring. Simply ensure Prometheus monitoring is [enabled for your project](../prometheus.md), which is on by default.
 
-If you have deployed with the [gitlab-omnibus](https://docs.gitlab.com/ee/install/kubernetes/gitlab_omnibus.md) Helm chart, these metrics will be automatically enabled and annotated for Prometheus monitoring.
+For other deployments, there is some configuration required depending on your installation:
+* NGINX Ingress should be version 0.9.0 or above
+* NGINX Ingress should be annotated for Prometheus monitoring
+* Prometheus should be configured to monitor annotated pods
+
+### Configuring NGINX Ingress for Prometheus monitoring
+
+Version 0.9.0 and above of [NGINX ingress](https://github.com/kubernetes/ingress/tree/master/controllers/nginx) have built-in support for exporting Prometheus metrics. To enable, a ConfigMap setting must be passed: `enable-vts-status: "true"`. Once enabled, a Prometheus metrics endpoint will start running on port 10254.
+
+With metric data now available, Prometheus needs to be configured to collect it. The easiest way to do this is to leverage Prometheus' [built-in Kubernetes service discovery](https://prometheus.io/docs/operating/configuration/#kubernetes_sd_config), which automatically detects a variety of Kubernetes components and makes them available for monitoring. NGINX ingress metrics are exposed per pod, a sample scrape configuration [is available](https://github.com/prometheus/prometheus/blob/master/documentation/examples/prometheus-kubernetes.yml#L248). This configuration will detect pods and enable collection of metrics **only if** they have been specifically annotated for monitoring.
+
+Depending on how NGINX ingress was deployed, typically a DaemonSet or Deployment, edit the corresponding YML spec. Two new annotations need to be added:
+* `prometheus.io/port: "true"`
+* `prometheus.io/port: "10254"`
+
+Prometheus should now be collecting NGINX ingress metrics. To validate view the Prometheus Targets, available under `Status > Targets` on the Prometheus dashboard. New entries for NGINX should be listed in the kubernetes pod monitoring job, `kubernetes-pods`.
 
 ## Specifying the Environment label
 
-In order to isolate and only display relevant metrics for a given environment
-however, GitLab needs a method to detect which labels are associated. To do this, GitLab will search metrics with appropriate labels. In this case, the `upstream` label must be of the form `<Kubernetes Namespace>-<CI_ENVIRONMENT_SLUG>-*`.
+In order to isolate and only display relevant metrics for a given environment, GitLab needs a method to detect which labels are associated. To do this, GitLab will search for metrics with appropriate labels. In this case, the `upstream` label must be of the form `<KUBE_NAMESPACE>-<CI_ENVIRONMENT_SLUG>-*`.
 
 If you have used [Auto Deploy](https://docs.gitlab.com/ee/ci/autodeploy/index.html) to deploy your app, this format will be used automatically and metrics will be detected with no action on your part.
author	Tim Zallmann <tzallmann@gitlab.com>	2017-09-11 12:56:32 +0000
committer	Tim Zallmann <tzallmann@gitlab.com>	2017-09-11 12:56:32 +0000
commit	9321a6cdc879b15eeb6e57506d4a4233f49afaae (patch)
tree	7148bfd593de6437816f8d0cd8f47818285c1b19
parent	eeacda2297131067d0f0e7c5801d6700175520f9 (diff)
parent	9297377653d9b28607f454cd548af028a325725e (diff)
download	gitlab-ce-9321a6cdc879b15eeb6e57506d4a4233f49afaae.tar.gz