summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Newdigate <andrew@gitlab.com>2019-07-16 22:10:44 +0200
committerAndrew Newdigate <andrew@gitlab.com>2019-07-18 19:04:12 +0200
commitdc14c91d065d869b77b0ec0db47b8b36c96f15be (patch)
treeada79dc72cdb0badc101a6034c3100c8beb0dae9
parentf97a73fa39b48b6c3c770d609fcd9584d17221da (diff)
downloadgitlab-ce-an-sidekiq-chaos.tar.gz
Adds chaos endpoints to Sidekiqan-sidekiq-chaos
This allows the chaos endpoints to be invoked in Sidekiq so that this environment can be tested for resilience.
-rw-r--r--app/controllers/chaos_controller.rb54
-rw-r--r--app/workers/all_queues.yml6
-rw-r--r--app/workers/chaos/cpu_spin_worker.rb12
-rw-r--r--app/workers/chaos/db_spin_worker.rb12
-rw-r--r--app/workers/chaos/kill_worker.rb12
-rw-r--r--app/workers/chaos/leak_mem_worker.rb12
-rw-r--r--app/workers/chaos/sleep_worker.rb12
-rw-r--r--app/workers/concerns/chaos_queue.rb9
-rw-r--r--changelogs/unreleased/an-sidekiq-chaos.yml5
-rw-r--r--config/routes.rb2
-rw-r--r--config/sidekiq_queues.yml1
-rw-r--r--doc/development/chaos_endpoints.md31
-rw-r--r--lib/gitlab/chaos.rb49
-rw-r--r--spec/controllers/chaos_controller_spec.rb127
14 files changed, 302 insertions, 42 deletions
diff --git a/app/controllers/chaos_controller.rb b/app/controllers/chaos_controller.rb
index 2985da35d83..ac008165c16 100644
--- a/app/controllers/chaos_controller.rb
+++ b/app/controllers/chaos_controller.rb
@@ -1,57 +1,38 @@
# frozen_string_literal: true
class ChaosController < ActionController::Base
- before_action :validate_chaos_secret, unless: :development?
- before_action :request_start_time
+ before_action :validate_chaos_secret, unless: :development_or_test?
def leakmem
- retainer = []
- # Add `n` 1mb chunks of memory to the retainer array
- memory_mb.times { retainer << "x" * 1.megabyte }
-
- Kernel.sleep(duration_left)
-
- render plain: "OK"
+ do_chaos :leak_mem, Chaos::LeakMemWorker, memory_mb, duration_s
end
def cpu_spin
- rand while Time.now < expected_end_time
-
- render plain: "OK"
+ do_chaos :cpu_spin, Chaos::CpuSpinWorker, duration_s
end
def db_spin
- while Time.now < expected_end_time
- ActiveRecord::Base.connection.execute("SELECT 1")
-
- end_interval_time = Time.now + [duration_s, interval_s].min
- rand while Time.now < end_interval_time
- end
+ do_chaos :db_spin, Chaos::DbSpinWorker, duration_s, interval_s
end
def sleep
- Kernel.sleep(duration_left)
-
- render plain: "OK"
+ do_chaos :sleep, Chaos::SleepWorker, duration_s
end
def kill
- Process.kill("KILL", Process.pid)
+ do_chaos :kill, Chaos::KillWorker
end
private
- def request_start_time
- @start_time ||= Time.now
- end
-
- def expected_end_time
- request_start_time + duration_s
- end
+ def do_chaos(method, worker, *args)
+ if async
+ worker.perform_async(*args)
+ else
+ Gitlab::Chaos.public_send(method, *args) # rubocop: disable GitlabSecurity/PublicSend
+ end
- def duration_left
- # returns 0 if over time
- [expected_end_time - Time.now, 0].max
+ render plain: "OK"
end
def validate_chaos_secret
@@ -91,7 +72,12 @@ class ChaosController < ActionController::Base
memory_mb.to_i
end
- def development?
- Rails.env.development?
+ def async
+ async = params[:async] || false
+ Gitlab::Utils.to_boolean(async)
+ end
+
+ def development_or_test?
+ Rails.env.development? || Rails.env.test?
end
end
diff --git a/app/workers/all_queues.yml b/app/workers/all_queues.yml
index 3d34bfc05c7..991a177018e 100644
--- a/app/workers/all_queues.yml
+++ b/app/workers/all_queues.yml
@@ -3,6 +3,12 @@
- auto_merge:auto_merge_process
+- chaos:chaos_cpu_spin
+- chaos:chaos_db_spin
+- chaos:chaos_kill
+- chaos:chaos_leak_mem
+- chaos:chaos_sleep
+
- cronjob:admin_email
- cronjob:expire_build_artifacts
- cronjob:gitlab_usage_ping
diff --git a/app/workers/chaos/cpu_spin_worker.rb b/app/workers/chaos/cpu_spin_worker.rb
new file mode 100644
index 00000000000..43a32c3274f
--- /dev/null
+++ b/app/workers/chaos/cpu_spin_worker.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Chaos
+ class CpuSpinWorker
+ include ApplicationWorker
+ include ChaosQueue
+
+ def perform(duration_s)
+ Gitlab::Chaos.cpu_spin(duration_s)
+ end
+ end
+end
diff --git a/app/workers/chaos/db_spin_worker.rb b/app/workers/chaos/db_spin_worker.rb
new file mode 100644
index 00000000000..217ddabbcb6
--- /dev/null
+++ b/app/workers/chaos/db_spin_worker.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Chaos
+ class DbSpinWorker
+ include ApplicationWorker
+ include ChaosQueue
+
+ def perform(duration_s, interval_s)
+ Gitlab::Chaos.db_spin(duration_s, interval_s)
+ end
+ end
+end
diff --git a/app/workers/chaos/kill_worker.rb b/app/workers/chaos/kill_worker.rb
new file mode 100644
index 00000000000..bbad53c9b86
--- /dev/null
+++ b/app/workers/chaos/kill_worker.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Chaos
+ class KillWorker
+ include ApplicationWorker
+ include ChaosQueue
+
+ def perform
+ Gitlab::Chaos.kill
+ end
+ end
+end
diff --git a/app/workers/chaos/leak_mem_worker.rb b/app/workers/chaos/leak_mem_worker.rb
new file mode 100644
index 00000000000..0caa99e0de9
--- /dev/null
+++ b/app/workers/chaos/leak_mem_worker.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Chaos
+ class LeakMemWorker
+ include ApplicationWorker
+ include ChaosQueue
+
+ def perform(memory_mb, duration_s)
+ Gitlab::Chaos.leak_mem(memory_mb, duration_s)
+ end
+ end
+end
diff --git a/app/workers/chaos/sleep_worker.rb b/app/workers/chaos/sleep_worker.rb
new file mode 100644
index 00000000000..7c724c4cb4e
--- /dev/null
+++ b/app/workers/chaos/sleep_worker.rb
@@ -0,0 +1,12 @@
+# frozen_string_literal: true
+
+module Chaos
+ class SleepWorker
+ include ApplicationWorker
+ include ChaosQueue
+
+ def perform(duration_s)
+ Gitlab::Chaos.sleep(duration_s)
+ end
+ end
+end
diff --git a/app/workers/concerns/chaos_queue.rb b/app/workers/concerns/chaos_queue.rb
new file mode 100644
index 00000000000..e406509d12d
--- /dev/null
+++ b/app/workers/concerns/chaos_queue.rb
@@ -0,0 +1,9 @@
+# frozen_string_literal: true
+#
+module ChaosQueue
+ extend ActiveSupport::Concern
+
+ included do
+ queue_namespace :chaos
+ end
+end
diff --git a/changelogs/unreleased/an-sidekiq-chaos.yml b/changelogs/unreleased/an-sidekiq-chaos.yml
new file mode 100644
index 00000000000..cede35c95cc
--- /dev/null
+++ b/changelogs/unreleased/an-sidekiq-chaos.yml
@@ -0,0 +1,5 @@
+---
+title: Adds chaos endpoints to Sidekiq
+merge_request: 30814
+author:
+type: other
diff --git a/config/routes.rb b/config/routes.rb
index 641807203bf..459f2b22bf0 100644
--- a/config/routes.rb
+++ b/config/routes.rb
@@ -116,7 +116,7 @@ Rails.application.routes.draw do
end
end
- if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development?
+ if ENV['GITLAB_CHAOS_SECRET'] || Rails.env.development? || Rails.env.test?
resource :chaos, only: [] do
get :leakmem
get :cpu_spin
diff --git a/config/sidekiq_queues.yml b/config/sidekiq_queues.yml
index 80791795390..c7586aa1e38 100644
--- a/config/sidekiq_queues.yml
+++ b/config/sidekiq_queues.yml
@@ -95,6 +95,7 @@
- [update_project_statistics, 1]
- [phabricator_import_import_tasks, 1]
- [update_namespace_statistics, 1]
+ - [chaos, 2]
# EE-specific queues
- [ldap_group_sync, 2]
diff --git a/doc/development/chaos_endpoints.md b/doc/development/chaos_endpoints.md
index b3406275937..eb6dde2d24e 100644
--- a/doc/development/chaos_endpoints.md
+++ b/doc/development/chaos_endpoints.md
@@ -36,6 +36,10 @@ Replace `secret` with your own secret token.
Once you have enabled the chaos endpoints and restarted the application, you can start testing using the endpoints.
+By default, when invoking a chaos endpoint, the web worker process which receives the request will handle it. This means, for example, that if the Kill
+operation is invoked, the Puma or Unicorn worker process handling the request will be killed. To test these operations in Sidekiq, the `async` parameter on
+each endpoint can be set to `true`. This will run the chaos process in a Sidekiq worker.
+
## Memory leaks
To simulate a memory leak in your application, use the `/-/chaos/leakmem` endpoint.
@@ -47,12 +51,14 @@ The memory is not retained after the request finishes. Once the request has comp
GET /-/chaos/leakmem
GET /-/chaos/leakmem?memory_mb=1024
GET /-/chaos/leakmem?memory_mb=1024&duration_s=50
+GET /-/chaos/leakmem?memory_mb=1024&duration_s=50&async=true
```
-| Attribute | Type | Required | Description |
-| ------------ | ------- | -------- | ---------------------------------------------------------------------------------- |
-| `memory_mb` | integer | no | How much memory, in MB, should be leaked. Defaults to 100MB. |
+| Attribute | Type | Required | Description |
+| ------------ | ------- | -------- | ------------------------------------------------------------------------------------ |
+| `memory_mb` | integer | no | How much memory, in MB, should be leaked. Defaults to 100MB. |
| `duration_s` | integer | no | Minimum duration_s, in seconds, that the memory should be retained. Defaults to 30s. |
+| `async` | boolean | no | Set to true to leak memory in a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/leakmem?memory_mb=1024&duration_s=10 --header 'X-Chaos-Secret: secret'
@@ -69,11 +75,13 @@ If you're using Unicorn, this is done by killing the worker process.
```
GET /-/chaos/cpu_spin
GET /-/chaos/cpu_spin?duration_s=50
+GET /-/chaos/cpu_spin?duration_s=50&async=true
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | --------------------------------------------------------------------- |
| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
+| `async` | boolean | no | Set to true to consume CPU in a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/cpu_spin?duration_s=60 --header 'X-Chaos-Secret: secret'
@@ -91,12 +99,14 @@ If you're using Unicorn, this is done by killing the worker process.
```
GET /-/chaos/db_spin
GET /-/chaos/db_spin?duration_s=50
+GET /-/chaos/db_spin?duration_s=50&async=true
```
-| Attribute | Type | Required | Description |
-| ------------ | ------- | -------- | --------------------------------------------------------------------- |
-| `interval_s` | float | no | Interval, in seconds, for every DB request. Defaults to 1s |
-| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
+| Attribute | Type | Required | Description |
+| ------------ | ------- | -------- | --------------------------------------------------------------------------- |
+| `interval_s` | float | no | Interval, in seconds, for every DB request. Defaults to 1s |
+| `duration_s` | integer | no | Duration, in seconds, that the core will be utilised. Defaults to 30s |
+| `async` | boolean | no | Set to true to perform the operation in a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/db_spin?interval_s=1&duration_s=60 --header 'X-Chaos-Secret: secret'
@@ -112,11 +122,13 @@ As with the CPU Spin endpoint, this may lead to your request timing out if durat
```
GET /-/chaos/sleep
GET /-/chaos/sleep?duration_s=50
+GET /-/chaos/sleep?duration_s=50&async=true
```
| Attribute | Type | Required | Description |
| ------------ | ------- | -------- | ---------------------------------------------------------------------- |
| `duration_s` | integer | no | Duration, in seconds, that the request will sleep for. Defaults to 30s |
+| `async` | boolean | no | Set to true to sleep in a Sidekiq background worker process |
```bash
curl http://localhost:3000/-/chaos/sleep?duration_s=60 --header 'X-Chaos-Secret: secret'
@@ -132,8 +144,13 @@ Since this endpoint uses the `KILL` signal, the worker is not given a chance to
```
GET /-/chaos/kill
+GET /-/chaos/kill?async=true
```
+| Attribute | Type | Required | Description |
+| ------------ | ------- | -------- | ---------------------------------------------------------------------- |
+| `async` | boolean | no | Set to true to kill a Sidekiq background worker process |
+
```bash
curl http://localhost:3000/-/chaos/kill --header 'X-Chaos-Secret: secret'
curl http://localhost:3000/-/chaos/kill?token=secret
diff --git a/lib/gitlab/chaos.rb b/lib/gitlab/chaos.rb
new file mode 100644
index 00000000000..4f47cdef971
--- /dev/null
+++ b/lib/gitlab/chaos.rb
@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+
+module Gitlab
+ # Chaos methods for GitLab.
+ # See https://docs.gitlab.com/ee/development/chaos_endpoints.html for more details.
+ class Chaos
+ # leak_mem will retain the specified amount of memory and sleep.
+ # On return, the memory will be released.
+ def self.leak_mem(memory_mb, duration_s)
+ start_time = Time.now
+
+ retainer = []
+ # Add `n` 1mb chunks of memory to the retainer array
+ memory_mb.times { retainer << "x" * 1.megabyte }
+
+ duration_left = [start_time + duration_s - Time.now, 0].max
+ Kernel.sleep(duration_left)
+ end
+
+ # cpu_spin will consume all CPU on a single core for the specified duration
+ def self.cpu_spin(duration_s)
+ expected_end_time = Time.now + duration_s
+
+ rand while Time.now < expected_end_time
+ end
+
+ # db_spin will query the database in a tight loop for the specified duration
+ def self.db_spin(duration_s, interval_s)
+ expected_end_time = Time.now + duration_s
+
+ while Time.now < expected_end_time
+ ActiveRecord::Base.connection.execute("SELECT 1")
+
+ end_interval_time = Time.now + [duration_s, interval_s].min
+ rand while Time.now < end_interval_time
+ end
+ end
+
+ # sleep will sleep for the specified duration
+ def self.sleep(duration_s)
+ Kernel.sleep(duration_s)
+ end
+
+ # Kill will send a SIGKILL signal to the current process
+ def self.kill
+ Process.kill("KILL", Process.pid)
+ end
+ end
+end
diff --git a/spec/controllers/chaos_controller_spec.rb b/spec/controllers/chaos_controller_spec.rb
new file mode 100644
index 00000000000..bafd4a70862
--- /dev/null
+++ b/spec/controllers/chaos_controller_spec.rb
@@ -0,0 +1,127 @@
+# frozen_string_literal: true
+
+require 'spec_helper'
+
+describe ChaosController do
+ describe '#leakmem' do
+ it 'calls synchronously' do
+ expect(Gitlab::Chaos).to receive(:leak_mem).with(100, 30.seconds)
+
+ get :leakmem
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'call synchronously with params' do
+ expect(Gitlab::Chaos).to receive(:leak_mem).with(1, 2.seconds)
+
+ get :leakmem, params: { memory_mb: 1, duration_s: 2 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'calls asynchronously' do
+ expect(Chaos::LeakMemWorker).to receive(:perform_async).with(100, 30.seconds)
+
+ get :leakmem, params: { async: 1 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+ end
+
+ describe '#cpu_spin' do
+ it 'calls synchronously' do
+ expect(Gitlab::Chaos).to receive(:cpu_spin).with(30.seconds)
+
+ get :cpu_spin
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'calls synchronously with params' do
+ expect(Gitlab::Chaos).to receive(:cpu_spin).with(3.seconds)
+
+ get :cpu_spin, params: { duration_s: 3 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'calls asynchronously' do
+ expect(Chaos::CpuSpinWorker).to receive(:perform_async).with(30.seconds)
+
+ get :cpu_spin, params: { async: 1 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+ end
+
+ describe '#db_spin' do
+ it 'calls synchronously' do
+ expect(Gitlab::Chaos).to receive(:db_spin).with(30.seconds, 1.second)
+
+ get :db_spin
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'calls synchronously with params' do
+ expect(Gitlab::Chaos).to receive(:db_spin).with(4.seconds, 5.seconds)
+
+ get :db_spin, params: { duration_s: 4, interval_s: 5 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'calls asynchronously' do
+ expect(Chaos::DbSpinWorker).to receive(:perform_async).with(30.seconds, 1.second)
+
+ get :db_spin, params: { async: 1 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+ end
+
+ describe '#sleep' do
+ it 'calls synchronously' do
+ expect(Gitlab::Chaos).to receive(:sleep).with(30.seconds)
+
+ get :sleep
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'calls synchronously with params' do
+ expect(Gitlab::Chaos).to receive(:sleep).with(5.seconds)
+
+ get :sleep, params: { duration_s: 5 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'calls asynchronously' do
+ expect(Chaos::SleepWorker).to receive(:perform_async).with(30.seconds)
+
+ get :sleep, params: { async: 1 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+ end
+
+ describe '#kill' do
+ it 'calls synchronously' do
+ expect(Gitlab::Chaos).to receive(:kill).with(no_args)
+
+ get :kill
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+
+ it 'calls asynchronously' do
+ expect(Chaos::KillWorker).to receive(:perform_async).with(no_args)
+
+ get :kill, params: { async: 1 }
+
+ expect(response).to have_gitlab_http_status(200)
+ end
+ end
+end