summaryrefslogtreecommitdiff
path: root/config/prometheus/pod_metrics.yml
diff options
context:
space:
mode:
Diffstat (limited to 'config/prometheus/pod_metrics.yml')
-rw-r--r--config/prometheus/pod_metrics.yml106
1 files changed, 81 insertions, 25 deletions
diff --git a/config/prometheus/pod_metrics.yml b/config/prometheus/pod_metrics.yml
index 29575ec543e..b9395124405 100644
--- a/config/prometheus/pod_metrics.yml
+++ b/config/prometheus/pod_metrics.yml
@@ -1,59 +1,115 @@
-dashboard: 'Pod metrics'
-priority: 10
+dashboard: 'K8s pod health'
+priority: 2
+
+templating:
+ variables:
+ pod:
+ label: 'Pod name'
+ type: metric_label_values
+ options:
+ series_selector: 'container_memory_working_set_bytes'
+ label: 'pod'
+
panel_groups:
- group: CPU metrics
panels:
- title: "CPU usage"
type: "line-chart"
- y_label: "Cores per pod"
+ y_label: "Cores per container"
metrics:
- id: pod_cpu_usage_seconds_total
- query_range: 'rate(container_cpu_usage_seconds_total{pod_name="{{pod_name}}",container_name="POD"}[5m])'
+ query_range: >-
+ sum(
+ rate(container_cpu_usage_seconds_total{pod="{{pod}}",container!="POD"}[5m])
+ )
+ by (container)
unit: "cores"
- label: pod_name
+ label: container
+
+ - title: "CPU throttling"
+ type: "line-chart"
+ y_label: "Cores per container"
+ metrics:
+ - id: pod_cpu_cfs_throttle
+ query_range: >-
+ sum(
+ rate(container_cpu_cfs_throttled_seconds_total{pod="{{pod}}"}[5m])
+ )
+ by (container)
+ unit: "cores"
+ label: container
+
- group: Memory metrics
panels:
- title: "Memory usage working set"
type: "line-chart"
- y_label: "Working set memory (MiB)"
+ y_label: "Working set memory"
metrics:
- id: pod_memory_working_set
- query_range: 'container_memory_working_set_bytes{pod_name="{{pod_name}}",container_name="POD"}/1024/1024'
- unit: "MiB"
- label: pod_name
+ query_range: >-
+ sum(
+ container_memory_working_set_bytes{pod="{{pod}}",container!="POD"}
+ ) by (container)
+ unit: "bytes"
+ label: container
+
- group: Network metrics
panels:
- title: "Network Receive (In)"
type: "line-chart"
- y_label: "Received (KiB/sec)"
+ y_label: "Received (bytes/sec)"
metrics:
- id: pod_network_receive
- query_range: 'rate(container_network_receive_bytes_total{pod_name="{{pod_name}}",container_name="POD"}[5m])/1024'
- unit: "KiB / sec"
- label: pod_name
+ query_range: >-
+ sum(
+ rate(
+ container_network_receive_bytes_total{pod="{{pod}}"}[5m]
+ )
+ ) by (pod)
+ unit: "bytes"
+ label: pod
+
- title: "Network Transmit (Out)"
type: "line-chart"
- y_label: "Transmitted (KiB/sec)"
+ y_label: "Transmitted (bytes/sec)"
metrics:
- id: pod_network_transmit
- query_range: 'rate(container_network_transmit_bytes_total{pod_name="{{pod_name}}",container_name="POD"}[5m])/1024'
- unit: "KiB / sec"
- label: pod_name
+ query_range: >-
+ sum(
+ rate(
+ container_network_transmit_bytes_total{pod="{{pod}}"}[5m]
+ )
+ ) by (pod)
+ unit: bytes
+ label: pod
+
- group: Disk metrics
panels:
- title: "Disk Reads"
type: "line-chart"
- y_label: "Disk reads (KiB/sec)"
+ y_label: "Disk reads (bytes/sec)"
metrics:
- id: pod_disk_reads
- query_range: 'rate(container_fs_reads_bytes_total{container_name="POD",pod_name="{{pod_name}}"}[5m])/1024'
- unit: "KiB / sec"
- label: pod_name
+ query_range: >-
+ sum(
+ rate(
+ container_fs_reads_bytes_total{pod="{{pod}}", container!="POD"}[5m]
+ )
+ ) by (container,device)
+
+ unit: "bytes / sec"
+ label: "{{container}} {{device}}"
+
- title: "Disk Writes"
type: "line-chart"
- y_label: "Disk writes (KiB/sec)"
+ y_label: "Disk writes (bytes/sec)"
metrics:
- id: pod_disk_writes
- query_range: 'rate(container_fs_writes_bytes_total{container_name="POD",pod_name="{{pod_name}}"}[5m])/1024'
- unit: "KiB / sec"
- label: pod_name
+ query_range: >-
+ sum(
+ rate(
+ container_fs_writes_bytes_total{pod="{{pod}}", container!="POD"}[5m]
+ )
+ ) by (container,device)
+ unit: "bytes / sec"
+ label: "{{container}} {{device}}"