diff options
Diffstat (limited to 'spec/lib/gitlab/usage_data/topology_spec.rb')
-rw-r--r-- | spec/lib/gitlab/usage_data/topology_spec.rb | 562 |
1 files changed, 562 insertions, 0 deletions
diff --git a/spec/lib/gitlab/usage_data/topology_spec.rb b/spec/lib/gitlab/usage_data/topology_spec.rb new file mode 100644 index 00000000000..2a7adea261d --- /dev/null +++ b/spec/lib/gitlab/usage_data/topology_spec.rb @@ -0,0 +1,562 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe Gitlab::UsageData::Topology do + include UsageDataHelpers + + describe '#topology_usage_data' do + subject { described_class.new.topology_usage_data } + + before do + # this pins down time shifts when benchmarking durations + allow(Process).to receive(:clock_gettime).and_return(0) + end + + context 'when embedded Prometheus server is enabled' do + before do + expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(true) + expect(Gitlab::Prometheus::Internal).to receive(:uri).and_return('http://prom:9090') + end + + context 'tracking node metrics' do + it 'contains node level metrics for each instance' do + expect_prometheus_api_to( + receive_app_request_volume_query, + receive_node_memory_query, + receive_node_cpu_count_query, + receive_node_uname_info_query, + receive_node_service_memory_rss_query, + receive_node_service_memory_uss_query, + receive_node_service_memory_pss_query, + receive_node_service_process_count_query, + receive_node_service_app_server_workers_query + ) + + expect(subject[:topology]).to eq({ + duration_s: 0, + application_requests_per_hour: 36, + failures: [], + nodes: [ + { + node_memory_total_bytes: 512, + node_cpus: 8, + node_uname_info: { + machine: 'x86_64', + sysname: 'Linux', + release: '4.19.76-linuxkit' + }, + node_services: [ + { + name: 'web', + process_count: 10, + process_memory_rss: 300, + process_memory_uss: 301, + process_memory_pss: 302, + server: 'puma' + }, + { + name: 'sidekiq', + process_count: 5, + process_memory_rss: 303 + } + ] + }, + { + node_memory_total_bytes: 1024, + node_cpus: 16, + node_uname_info: { + machine: 'x86_64', + sysname: 'Linux', + release: '4.15.0-101-generic' + }, + node_services: [ + { + name: 'sidekiq', + process_count: 15, + process_memory_rss: 400, + process_memory_pss: 401 + }, + { + name: 'redis', + process_count: 1, + process_memory_rss: 402 + }, + { + name: 'registry', + process_count: 1 + }, + { + name: 'web', + server: 'unicorn' + } + ] + } + ] + }) + end + end + + context 'and some node memory metrics are missing' do + it 'removes the respective entries and includes the failures' do + expect_prometheus_api_to( + receive_app_request_volume_query(result: []), + receive_node_memory_query(result: []), + receive_node_cpu_count_query, + receive_node_uname_info_query, + receive_node_service_memory_rss_query(result: []), + receive_node_service_memory_uss_query(result: []), + receive_node_service_memory_pss_query, + receive_node_service_process_count_query, + receive_node_service_app_server_workers_query(result: []) + ) + + expect(subject[:topology]).to eq({ + duration_s: 0, + failures: [ + { 'app_requests' => 'empty_result' }, + { 'node_memory' => 'empty_result' }, + { 'service_rss' => 'empty_result' }, + { 'service_uss' => 'empty_result' }, + { 'service_workers' => 'empty_result' } + ], + nodes: [ + { + node_cpus: 16, + node_uname_info: { + machine: 'x86_64', + release: '4.15.0-101-generic', + sysname: 'Linux' + }, + node_services: [ + { + name: 'sidekiq', + process_count: 15, + process_memory_pss: 401 + }, + { + name: 'redis', + process_count: 1 + }, + { + name: 'registry', + process_count: 1 + } + ] + }, + { + node_cpus: 8, + node_uname_info: { + machine: 'x86_64', + release: '4.19.76-linuxkit', + sysname: 'Linux' + }, + node_services: [ + { + name: 'web', + process_count: 10, + process_memory_pss: 302 + }, + { + name: 'sidekiq', + process_count: 5 + } + ] + } + ] + }) + end + end + + context 'and services run on the same node but report different instance values' do + let(:node_memory_response) do + [ + { + 'metric' => { 'instance' => 'localhost:9100' }, + 'value' => [1000, '512'] + } + ] + end + let(:node_uname_info_response) do + [ + { + "metric" => { + "__name__" => "node_uname_info", + "domainname" => "(none)", + "instance" => "127.0.0.1:9100", + "job" => "node_exporter", + "machine" => "x86_64", + "nodename" => "127.0.0.1", + "release" => "4.19.76-linuxkit", + "sysname" => "Linux" + }, + "value" => [1592463033.359, "1"] + } + ] + end + # The services in this response should all be mapped to localhost i.e. the same node + let(:service_memory_response) do + [ + { + 'metric' => { 'instance' => 'localhost:8080', 'job' => 'gitlab-rails' }, + 'value' => [1000, '10'] + }, + { + 'metric' => { 'instance' => '127.0.0.1:8090', 'job' => 'gitlab-sidekiq' }, + 'value' => [1000, '11'] + }, + { + 'metric' => { 'instance' => '0.0.0.0:9090', 'job' => 'prometheus' }, + 'value' => [1000, '12'] + }, + { + 'metric' => { 'instance' => '[::1]:1234', 'job' => 'redis' }, + 'value' => [1000, '13'] + }, + { + 'metric' => { 'instance' => '[::]:1234', 'job' => 'postgres' }, + 'value' => [1000, '14'] + } + ] + end + + it 'normalizes equivalent instance values and maps them to the same node' do + expect_prometheus_api_to( + receive_app_request_volume_query(result: []), + receive_node_memory_query(result: node_memory_response), + receive_node_cpu_count_query(result: []), + receive_node_uname_info_query(result: node_uname_info_response), + receive_node_service_memory_rss_query(result: service_memory_response), + receive_node_service_memory_uss_query(result: []), + receive_node_service_memory_pss_query(result: []), + receive_node_service_process_count_query(result: []), + receive_node_service_app_server_workers_query(result: []) + ) + + expect(subject[:topology]).to eq({ + duration_s: 0, + failures: [ + { 'app_requests' => 'empty_result' }, + { 'node_cpus' => 'empty_result' }, + { 'service_uss' => 'empty_result' }, + { 'service_pss' => 'empty_result' }, + { 'service_process_count' => 'empty_result' }, + { 'service_workers' => 'empty_result' } + ], + nodes: [ + { + node_memory_total_bytes: 512, + node_uname_info: { + machine: 'x86_64', + sysname: 'Linux', + release: '4.19.76-linuxkit' + }, + node_services: [ + { + name: 'web', + process_memory_rss: 10 + }, + { + name: 'sidekiq', + process_memory_rss: 11 + }, + { + name: 'prometheus', + process_memory_rss: 12 + }, + { + name: 'redis', + process_memory_rss: 13 + }, + { + name: 'postgres', + process_memory_rss: 14 + } + ] + } + ] + }) + end + end + + context 'and node metrics are missing but service metrics exist' do + it 'still reports service metrics' do + expect_prometheus_api_to( + receive_app_request_volume_query(result: []), + receive_node_memory_query(result: []), + receive_node_cpu_count_query(result: []), + receive_node_uname_info_query(result: []), + receive_node_service_memory_rss_query, + receive_node_service_memory_uss_query(result: []), + receive_node_service_memory_pss_query(result: []), + receive_node_service_process_count_query(result: []), + receive_node_service_app_server_workers_query(result: []) + ) + + expect(subject[:topology]).to eq({ + duration_s: 0, + failures: [ + { 'app_requests' => 'empty_result' }, + { 'node_memory' => 'empty_result' }, + { 'node_cpus' => 'empty_result' }, + { 'node_uname_info' => 'empty_result' }, + { 'service_uss' => 'empty_result' }, + { 'service_pss' => 'empty_result' }, + { 'service_process_count' => 'empty_result' }, + { 'service_workers' => 'empty_result' } + ], + nodes: [ + { + node_services: [ + { + name: 'web', + process_memory_rss: 300 + }, + { + name: 'sidekiq', + process_memory_rss: 303 + } + ] + }, + { + node_services: [ + { + name: 'sidekiq', + process_memory_rss: 400 + }, + { + name: 'redis', + process_memory_rss: 402 + } + ] + } + ] + }) + end + end + + context 'and an error is raised when querying Prometheus' do + it 'returns empty result with failures' do + expect_prometheus_api_to receive(:query) + .at_least(:once) + .and_raise(Gitlab::PrometheusClient::ConnectionError) + + expect(subject[:topology]).to eq({ + duration_s: 0, + failures: [ + { 'app_requests' => 'Gitlab::PrometheusClient::ConnectionError' }, + { 'node_memory' => 'Gitlab::PrometheusClient::ConnectionError' }, + { 'node_cpus' => 'Gitlab::PrometheusClient::ConnectionError' }, + { 'node_uname_info' => 'Gitlab::PrometheusClient::ConnectionError' }, + { 'service_rss' => 'Gitlab::PrometheusClient::ConnectionError' }, + { 'service_uss' => 'Gitlab::PrometheusClient::ConnectionError' }, + { 'service_pss' => 'Gitlab::PrometheusClient::ConnectionError' }, + { 'service_process_count' => 'Gitlab::PrometheusClient::ConnectionError' }, + { 'service_workers' => 'Gitlab::PrometheusClient::ConnectionError' } + ], + nodes: [] + }) + end + end + end + + context 'when embedded Prometheus server is disabled' do + it 'returns empty result with no failures' do + expect(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_return(false) + + expect(subject[:topology]).to eq({ + duration_s: 0, + failures: [] + }) + end + end + + context 'when top-level function raises error' do + it 'returns empty result with generic failure' do + allow(Gitlab::Prometheus::Internal).to receive(:prometheus_enabled?).and_raise(RuntimeError) + + expect(subject[:topology]).to eq({ + duration_s: 0, + failures: [ + { 'other' => 'RuntimeError' } + ] + }) + end + end + end + + def receive_app_request_volume_query(result: nil) + receive(:query) + .with(/gitlab_usage_ping:ops:rate/) + .and_return(result || [ + { + 'metric' => { 'component' => 'http_requests', 'service' => 'workhorse' }, + 'value' => [1000, '0.01'] + } + ]) + end + + def receive_node_memory_query(result: nil) + receive(:query) + .with(/node_memory_total_bytes/, an_instance_of(Hash)) + .and_return(result || [ + { + 'metric' => { 'instance' => 'instance1:8080' }, + 'value' => [1000, '512'] + }, + { + 'metric' => { 'instance' => 'instance2:8090' }, + 'value' => [1000, '1024'] + } + ]) + end + + def receive_node_cpu_count_query(result: nil) + receive(:query) + .with(/node_cpus/, an_instance_of(Hash)) + .and_return(result || [ + { + 'metric' => { 'instance' => 'instance2:8090' }, + 'value' => [1000, '16'] + }, + { + 'metric' => { 'instance' => 'instance1:8080' }, + 'value' => [1000, '8'] + } + ]) + end + + def receive_node_uname_info_query(result: nil) + receive(:query) + .with('node_uname_info') + .and_return(result || [ + { + "metric" => { + "__name__" => "node_uname_info", + "domainname" => "(none)", + "instance" => "instance1:9100", + "job" => "node_exporter", + "machine" => "x86_64", + "nodename" => "instance1", + "release" => "4.19.76-linuxkit", + "sysname" => "Linux" + }, + "value" => [1592463033.359, "1"] + }, + { + "metric" => { + "__name__" => "node_uname_info", + "domainname" => "(none)", + "instance" => "instance2:9100", + "job" => "node_exporter", + "machine" => "x86_64", + "nodename" => "instance2", + "release" => "4.15.0-101-generic", + "sysname" => "Linux" + }, + "value" => [1592463033.359, "1"] + } + ]) + end + + def receive_node_service_memory_rss_query(result: nil) + receive(:query) + .with(/process_resident_memory_bytes/, an_instance_of(Hash)) + .and_return(result || [ + { + 'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails' }, + 'value' => [1000, '300'] + }, + { + 'metric' => { 'instance' => 'instance1:8090', 'job' => 'gitlab-sidekiq' }, + 'value' => [1000, '303'] + }, + # instance 2: runs a dedicated Sidekiq + Redis (which uses a different metric name) + { + 'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq' }, + 'value' => [1000, '400'] + }, + { + 'metric' => { 'instance' => 'instance2:9121', 'job' => 'redis' }, + 'value' => [1000, '402'] + } + ]) + end + + def receive_node_service_memory_uss_query(result: nil) + receive(:query) + .with(/process_unique_memory_bytes/, an_instance_of(Hash)) + .and_return(result || [ + { + 'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails' }, + 'value' => [1000, '301'] + } + ]) + end + + def receive_node_service_memory_pss_query(result: nil) + receive(:query) + .with(/process_proportional_memory_bytes/, an_instance_of(Hash)) + .and_return(result || [ + { + 'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails' }, + 'value' => [1000, '302'] + }, + { + 'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq' }, + 'value' => [1000, '401'] + } + ]) + end + + def receive_node_service_process_count_query(result: nil) + receive(:query) + .with(/service_process:count/, an_instance_of(Hash)) + .and_return(result || [ + # instance 1 + { + 'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails' }, + 'value' => [1000, '10'] + }, + { + 'metric' => { 'instance' => 'instance1:8090', 'job' => 'gitlab-sidekiq' }, + 'value' => [1000, '5'] + }, + # instance 2 + { + 'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq' }, + 'value' => [1000, '15'] + }, + { + 'metric' => { 'instance' => 'instance2:9121', 'job' => 'redis' }, + 'value' => [1000, '1'] + }, + { + 'metric' => { 'instance' => 'instance2:8080', 'job' => 'registry' }, + 'value' => [1000, '1'] + }, + # unknown service => should be stripped out + { + 'metric' => { 'instance' => 'instance2:9000', 'job' => 'not-a-gitlab-service' }, + 'value' => [1000, '42'] + } + ]) + end + + def receive_node_service_app_server_workers_query(result: nil) + receive(:query) + .with(/app_server_workers/, an_instance_of(Hash)) + .and_return(result || [ + # instance 1 + { + 'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails', 'server' => 'puma' }, + 'value' => [1000, '2'] + }, + # instance 2 + { + 'metric' => { 'instance' => 'instance2:8080', 'job' => 'gitlab-rails', 'server' => 'unicorn' }, + 'value' => [1000, '1'] + } + ]) + end +end |