diff options
author | Nick Vatamaniuc <vatamane@gmail.com> | 2021-04-26 15:30:15 -0400 |
---|---|---|
committer | Nick Vatamaniuc <nickva@users.noreply.github.com> | 2021-04-27 14:32:15 -0400 |
commit | ff2242c5f90fd307eb5df52bc09952b1e580e3f2 (patch) | |
tree | d13c693274d881e38abae41aa78c7eb61817a73a | |
parent | d7011765673e649ac2adcde3db2beede920f40be (diff) | |
download | couchdb-ff2242c5f90fd307eb5df52bc09952b1e580e3f2.tar.gz |
Improve transaction retry behavior for _all_dbs, _dbs_info and _deleted_dbs
After recent improvements to how retries are handled in `fabric2_fdb`, Elixir
integration tests can often pass when running under "buggify" mode. The chance
of re-sending response data during retries is now much lower, too. However,
there are still some instances of that type of failure when running `_all_dbs`
tests. To trigger it would have to run the all_dbs test from basics_tests.exs a
few thousands times in a row. The reason for the failure is that retryable
errors might be still thrown during the `LayerPrefix = get_dir(Tx)` call, and
the whole transaction closure would be retried in [2]. When that happens,
user's callback is called twice with `meta` and it sends `[` twice in a row to
the client [3], which is incorrect.
A simple fix is to not call `meta` or `complete` from the transaction context.
That works because we don't pass the transaction object into user's callback
and the user won't be able to run another transaction in the same process
anyway.
There are already tests which test retriable errors in _all_dbsa and _dbs_info,
but they had been updated to only throw retriable errors when rows are emitted
to match the new behavior.
[0] https://github.com/apache/couchdb/commit/acb43e12fd7fddc6f606246875909f7c7df27324
[1] ```
ERL_ZFLAGS="-erlfdb network_options '[client_buggify_enable, {client_buggify_section_activated_probability, 35}, {client_buggify_section_fired_probability, 35}]'" make elixir tests=test/elixir/test/basics_test.exs:71
```
[2] https://github.com/apache/couchdb/blob/082f8078411aab7d71cc86afca0fe2eff3104b01/src/fabric/src/fabric2_db.erl#L279-L287
[3] https://github.com/apache/couchdb/blob/082f8078411aab7d71cc86afca0fe2eff3104b01/src/chttpd/src/chttpd_misc.erl#L137
-rw-r--r-- | src/fabric/src/fabric2_db.erl | 67 | ||||
-rw-r--r-- | src/fabric/test/fabric2_db_crud_tests.erl | 18 |
2 files changed, 41 insertions, 44 deletions
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl index 0074865a0..a5de00ca0 100644 --- a/src/fabric/src/fabric2_db.erl +++ b/src/fabric/src/fabric2_db.erl @@ -275,20 +275,15 @@ list_dbs(UserFun, UserAcc0, Options) -> FoldFun = fun (DbName, Acc) -> maybe_stop(UserFun({row, [{id, DbName}]}, Acc)) end, - fabric2_fdb:transactional(fun(Tx) -> - try - UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), - UserAcc2 = fabric2_fdb:list_dbs( - Tx, - FoldFun, - UserAcc1, - Options - ), - {ok, maybe_stop(UserFun(complete, UserAcc2))} - catch throw:{stop, FinalUserAcc} -> - {ok, FinalUserAcc} - end - end). + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + UserAcc2 = fabric2_fdb:transactional(fun(Tx) -> + fabric2_fdb:list_dbs(Tx, FoldFun, UserAcc1, Options) + end), + {ok, maybe_stop(UserFun(complete, UserAcc2))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end. list_dbs_info() -> @@ -313,22 +308,22 @@ list_dbs_info(UserFun, UserAcc0, Options) -> NewFutureQ = queue:in({DbName, InfoFuture}, FutureQ), drain_info_futures(NewFutureQ, Count + 1, UserFun, Acc) end, - fabric2_fdb:transactional(fun(Tx) -> - try - UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), - InitAcc = {queue:new(), 0, UserAcc1}, + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + InitAcc = {queue:new(), 0, UserAcc1}, + UserAcc3 = fabric2_fdb:transactional(fun(Tx) -> {FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_dbs_info( Tx, FoldFun, InitAcc, Options ), - UserAcc3 = drain_all_info_futures(FinalFutureQ, UserFun, UserAcc2), - {ok, maybe_stop(UserFun(complete, UserAcc3))} - catch throw:{stop, FinalUserAcc} -> - {ok, FinalUserAcc} - end - end). + drain_all_info_futures(FinalFutureQ, UserFun, UserAcc2) + end), + {ok, maybe_stop(UserFun(complete, UserAcc3))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end. list_deleted_dbs_info() -> @@ -390,26 +385,22 @@ list_deleted_dbs_info(UserFun, UserAcc0, Options0) -> NewFutureQ = queue:in({DbName, TimeStamp, InfoFuture}, FutureQ), drain_deleted_info_futures(NewFutureQ, Count + 1, UserFun, Acc) end, - fabric2_fdb:transactional(fun(Tx) -> - try - UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), - InitAcc = {queue:new(), 0, UserAcc1}, + try + UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)), + InitAcc = {queue:new(), 0, UserAcc1}, + UserAcc3 = fabric2_fdb:transactional(fun(Tx) -> {FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_deleted_dbs_info( Tx, FoldFun, InitAcc, Options2 ), - UserAcc3 = drain_all_deleted_info_futures( - FinalFutureQ, - UserFun, - UserAcc2 - ), - {ok, maybe_stop(UserFun(complete, UserAcc3))} - catch throw:{stop, FinalUserAcc} -> - {ok, FinalUserAcc} - end - end). + drain_all_deleted_info_futures(FinalFutureQ, UserFun, UserAcc2) + end), + {ok, maybe_stop(UserFun(complete, UserAcc3))} + catch throw:{stop, FinalUserAcc} -> + {ok, FinalUserAcc} + end. is_admin(Db, {SecProps}) when is_list(SecProps) -> diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl index 3d90c65b5..ab157d881 100644 --- a/src/fabric/test/fabric2_db_crud_tests.erl +++ b/src/fabric/test/fabric2_db_crud_tests.erl @@ -449,9 +449,12 @@ list_dbs_tx_too_old(_) -> ?assertMatch({ok, _}, fabric2_db:create(DbName1, [])), ?assertMatch({ok, _}, fabric2_db:create(DbName2, [])), - UserFun = fun(Row, Acc) -> - fabric2_test_util:tx_too_old_raise_in_user_fun(), - {ok, [Row | Acc]} + UserFun = fun + ({row, _} = Row, Acc) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), + {ok, [Row | Acc]}; + (Row, Acc) -> + {ok, [Row | Acc]} end, % Get get expected output without any transactions timing out @@ -492,9 +495,12 @@ list_dbs_info_tx_too_old(_) -> DbName end, lists:seq(1, DbCount)), - UserFun = fun(Row, Acc) -> - fabric2_test_util:tx_too_old_raise_in_user_fun(), - {ok, [Row | Acc]} + UserFun = fun + ({row, _} = Row, Acc) -> + fabric2_test_util:tx_too_old_raise_in_user_fun(), + {ok, [Row | Acc]}; + (Row, Acc) -> + {ok, [Row | Acc]} end, % This is the expected return with no tx timeouts |