diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2021-05-10 01:52:05 -0400 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2021-05-10 11:40:32 -0400 |
commit | 96d3860daefdb65ceba472e99a7de1b0b7f591f6 (patch) | |
tree | c4b0fc6bf56cbdbac160dfad65ef370c50e00b50 | |
parent | e69184a81b8188a04c836bc2816e00c7833295f5 (diff) | |
download | couchdb-96d3860daefdb65ceba472e99a7de1b0b7f591f6.tar.gz |
Introduce buggify integration test mode
Add `buggify-elixir-suite` target to run Elixir integration tests
under FoundationDB's client buggify mode [1]. In this mode, the FDB C
client in the `erlfdb` application will periodically throw mostly
retryable errors (`1009`, `1007`, etc). Transaction closures should
properly handle retryable errors without side-effects such as
re-sending response data to the user more than once or, attempt to
re-read data from the socket after it was already read once.
In order to avoid false positives, provide a custom .ini settings file
which disables transaction timeouts (`1031` errors). Those are not
retryable by default, as far as the `on_error` callback is
concerned. Ff we do have timeouts set ( = 60000), it signals the
FoundationDB client that we expect to handle timeouts in buggify mode,
so it starts throwing them [2]. Since we don't handle those everywhere
we get quite a few false positive errors.
Buggify settings I believe are the default -- 25% chance to activate
an error, and 25% chance of firing the error when the code passes over
that section. In most test runs this should result in a pass, but
sometimes, due to lingering bugs, there will be timeouts, 409
conflicts and other failures so we cannot yet turn this into a
reliable integration test step.
[1] https://apple.github.io/foundationdb/client-testing.html
[2] https://github.com/apple/foundationdb/blob/master/fdbclient/ReadYourWrites.actor.cpp#L1191-L1194
-rw-r--r-- | Makefile | 12 | ||||
-rw-r--r-- | rel/files/buggify-eunit.config | 13 | ||||
-rw-r--r-- | test/elixir/test/config/buggify-test-config.ini | 12 |
3 files changed, 37 insertions, 0 deletions
@@ -282,6 +282,18 @@ elixir-suite: elixir-init elixir-check-formatted elixir-credo devclean --erlang-config rel/files/eunit.config \ --no-eval 'mix test --trace --include test/elixir/test/config/suite.elixir --exclude test/elixir/test/config/skip.elixir' +.PHONY: buggify-elixir-suite +buggify-elixir-suite: export MIX_ENV=integration +buggify-elixir-suite: export COUCHDB_TEST_ADMIN_PARTY_OVERRIDE=1 +buggify-elixir-suite: elixir-init devclean + @dev/run -n 1 -q -a adm:pass \ + --enable-erlang-views \ + --no-join \ + --locald-config test/elixir/test/config/test-config.ini \ + --locald-config test/elixir/test/config/buggify-test-config.ini \ + --erlang-config rel/files/buggify-eunit.config \ + --no-eval 'mix test --trace --include test/elixir/test/config/suite.elixir --exclude test/elixir/test/config/skip.elixir' + .PHONY: elixir-check-formatted elixir-check-formatted: elixir-init @mix format --check-formatted diff --git a/rel/files/buggify-eunit.config b/rel/files/buggify-eunit.config new file mode 100644 index 000000000..61ccdaa0b --- /dev/null +++ b/rel/files/buggify-eunit.config @@ -0,0 +1,13 @@ +[ + {kernel, [{error_logger, silent}]}, + {sasl, [{sasl_error_logger, false}]}, + {fabric, [{eunit_run, true}]}, + {erlfdb, [ + {network_options, [ + client_buggify_enable, + {client_buggify_section_activated_probability, 25}, + {client_buggify_section_fired_probability, 25} + ]} + ]} +]. + diff --git a/test/elixir/test/config/buggify-test-config.ini b/test/elixir/test/config/buggify-test-config.ini new file mode 100644 index 000000000..65274e728 --- /dev/null +++ b/test/elixir/test/config/buggify-test-config.ini @@ -0,0 +1,12 @@ +[fdb_tx_options] +; If timeout is set i.e. > 0, client buggify may raise 1031 +; (transaction timeouts) which is not a retryable error. Since we do +; not handle every single timeout properly but do set a transaction +; timeout, we will get a lot of false positive errors +timeout = 0 + +; Do not set a retry limit to avoid getting false positive failures. +retry_limit = -1 + +[couch_views] +indexer_tx_retry_limit = 1000 |