summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2021-05-10 01:52:05 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2021-05-10 11:40:32 -0400
commit96d3860daefdb65ceba472e99a7de1b0b7f591f6 (patch)
treec4b0fc6bf56cbdbac160dfad65ef370c50e00b50
parente69184a81b8188a04c836bc2816e00c7833295f5 (diff)
downloadcouchdb-96d3860daefdb65ceba472e99a7de1b0b7f591f6.tar.gz
Introduce buggify integration test mode
Add `buggify-elixir-suite` target to run Elixir integration tests under FoundationDB's client buggify mode [1]. In this mode, the FDB C client in the `erlfdb` application will periodically throw mostly retryable errors (`1009`, `1007`, etc). Transaction closures should properly handle retryable errors without side-effects such as re-sending response data to the user more than once or, attempt to re-read data from the socket after it was already read once. In order to avoid false positives, provide a custom .ini settings file which disables transaction timeouts (`1031` errors). Those are not retryable by default, as far as the `on_error` callback is concerned. Ff we do have timeouts set ( = 60000), it signals the FoundationDB client that we expect to handle timeouts in buggify mode, so it starts throwing them [2]. Since we don't handle those everywhere we get quite a few false positive errors. Buggify settings I believe are the default -- 25% chance to activate an error, and 25% chance of firing the error when the code passes over that section. In most test runs this should result in a pass, but sometimes, due to lingering bugs, there will be timeouts, 409 conflicts and other failures so we cannot yet turn this into a reliable integration test step. [1] https://apple.github.io/foundationdb/client-testing.html [2] https://github.com/apple/foundationdb/blob/master/fdbclient/ReadYourWrites.actor.cpp#L1191-L1194
-rw-r--r--Makefile12
-rw-r--r--rel/files/buggify-eunit.config13
-rw-r--r--test/elixir/test/config/buggify-test-config.ini12
3 files changed, 37 insertions, 0 deletions
diff --git a/Makefile b/Makefile
index 79141184f..fc9011707 100644
--- a/Makefile
+++ b/Makefile
@@ -282,6 +282,18 @@ elixir-suite: elixir-init elixir-check-formatted elixir-credo devclean
--erlang-config rel/files/eunit.config \
--no-eval 'mix test --trace --include test/elixir/test/config/suite.elixir --exclude test/elixir/test/config/skip.elixir'
+.PHONY: buggify-elixir-suite
+buggify-elixir-suite: export MIX_ENV=integration
+buggify-elixir-suite: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1
+buggify-elixir-suite: elixir-init devclean
+ @dev/run -n 1 -q -a adm:pass \
+ --enable-erlang-views \
+ --no-join \
+ --locald-config test/elixir/test/config/test-config.ini \
+ --locald-config test/elixir/test/config/buggify-test-config.ini \
+ --erlang-config rel/files/buggify-eunit.config \
+ --no-eval 'mix test --trace --include test/elixir/test/config/suite.elixir --exclude test/elixir/test/config/skip.elixir'
+
.PHONY: elixir-check-formatted
elixir-check-formatted: elixir-init
@mix format --check-formatted
diff --git a/rel/files/buggify-eunit.config b/rel/files/buggify-eunit.config
new file mode 100644
index 000000000..61ccdaa0b
--- /dev/null
+++ b/rel/files/buggify-eunit.config
@@ -0,0 +1,13 @@
+[
+ {kernel, [{error_logger, silent}]},
+ {sasl, [{sasl_error_logger, false}]},
+ {fabric, [{eunit_run, true}]},
+ {erlfdb, [
+ {network_options, [
+ client_buggify_enable,
+ {client_buggify_section_activated_probability, 25},
+ {client_buggify_section_fired_probability, 25}
+ ]}
+ ]}
+].
+
diff --git a/test/elixir/test/config/buggify-test-config.ini b/test/elixir/test/config/buggify-test-config.ini
new file mode 100644
index 000000000..65274e728
--- /dev/null
+++ b/test/elixir/test/config/buggify-test-config.ini
@@ -0,0 +1,12 @@
+[fdb_tx_options]
+; If timeout is set i.e. > 0, client buggify may raise 1031
+; (transaction timeouts) which is not a retryable error. Since we do
+; not handle every single timeout properly but do set a transaction
+; timeout, we will get a lot of false positive errors
+timeout = 0
+
+; Do not set a retry limit to avoid getting false positive failures.
+retry_limit = -1
+
+[couch_views]
+indexer_tx_retry_limit = 1000