summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2021-04-26 15:30:15 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2021-04-27 14:32:15 -0400
commitff2242c5f90fd307eb5df52bc09952b1e580e3f2 (patch)
treed13c693274d881e38abae41aa78c7eb61817a73a
parentd7011765673e649ac2adcde3db2beede920f40be (diff)
downloadcouchdb-ff2242c5f90fd307eb5df52bc09952b1e580e3f2.tar.gz
Improve transaction retry behavior for _all_dbs, _dbs_info and _deleted_dbs
After recent improvements to how retries are handled in `fabric2_fdb`, Elixir integration tests can often pass when running under "buggify" mode. The chance of re-sending response data during retries is now much lower, too. However, there are still some instances of that type of failure when running `_all_dbs` tests. To trigger it would have to run the all_dbs test from basics_tests.exs a few thousands times in a row. The reason for the failure is that retryable errors might be still thrown during the `LayerPrefix = get_dir(Tx)` call, and the whole transaction closure would be retried in [2]. When that happens, user's callback is called twice with `meta` and it sends `[` twice in a row to the client [3], which is incorrect. A simple fix is to not call `meta` or `complete` from the transaction context. That works because we don't pass the transaction object into user's callback and the user won't be able to run another transaction in the same process anyway. There are already tests which test retriable errors in _all_dbsa and _dbs_info, but they had been updated to only throw retriable errors when rows are emitted to match the new behavior. [0] https://github.com/apache/couchdb/commit/acb43e12fd7fddc6f606246875909f7c7df27324 [1] ``` ERL_ZFLAGS="-erlfdb network_options '[client_buggify_enable, {client_buggify_section_activated_probability, 35}, {client_buggify_section_fired_probability, 35}]'" make elixir tests=test/elixir/test/basics_test.exs:71 ``` [2] https://github.com/apache/couchdb/blob/082f8078411aab7d71cc86afca0fe2eff3104b01/src/fabric/src/fabric2_db.erl#L279-L287 [3] https://github.com/apache/couchdb/blob/082f8078411aab7d71cc86afca0fe2eff3104b01/src/chttpd/src/chttpd_misc.erl#L137
-rw-r--r--src/fabric/src/fabric2_db.erl67
-rw-r--r--src/fabric/test/fabric2_db_crud_tests.erl18
2 files changed, 41 insertions, 44 deletions
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index 0074865a0..a5de00ca0 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -275,20 +275,15 @@ list_dbs(UserFun, UserAcc0, Options) ->
FoldFun = fun
(DbName, Acc) -> maybe_stop(UserFun({row, [{id, DbName}]}, Acc))
end,
- fabric2_fdb:transactional(fun(Tx) ->
- try
- UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
- UserAcc2 = fabric2_fdb:list_dbs(
- Tx,
- FoldFun,
- UserAcc1,
- Options
- ),
- {ok, maybe_stop(UserFun(complete, UserAcc2))}
- catch throw:{stop, FinalUserAcc} ->
- {ok, FinalUserAcc}
- end
- end).
+ try
+ UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
+ UserAcc2 = fabric2_fdb:transactional(fun(Tx) ->
+ fabric2_fdb:list_dbs(Tx, FoldFun, UserAcc1, Options)
+ end),
+ {ok, maybe_stop(UserFun(complete, UserAcc2))}
+ catch throw:{stop, FinalUserAcc} ->
+ {ok, FinalUserAcc}
+ end.
list_dbs_info() ->
@@ -313,22 +308,22 @@ list_dbs_info(UserFun, UserAcc0, Options) ->
NewFutureQ = queue:in({DbName, InfoFuture}, FutureQ),
drain_info_futures(NewFutureQ, Count + 1, UserFun, Acc)
end,
- fabric2_fdb:transactional(fun(Tx) ->
- try
- UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
- InitAcc = {queue:new(), 0, UserAcc1},
+ try
+ UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
+ InitAcc = {queue:new(), 0, UserAcc1},
+ UserAcc3 = fabric2_fdb:transactional(fun(Tx) ->
{FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_dbs_info(
Tx,
FoldFun,
InitAcc,
Options
),
- UserAcc3 = drain_all_info_futures(FinalFutureQ, UserFun, UserAcc2),
- {ok, maybe_stop(UserFun(complete, UserAcc3))}
- catch throw:{stop, FinalUserAcc} ->
- {ok, FinalUserAcc}
- end
- end).
+ drain_all_info_futures(FinalFutureQ, UserFun, UserAcc2)
+ end),
+ {ok, maybe_stop(UserFun(complete, UserAcc3))}
+ catch throw:{stop, FinalUserAcc} ->
+ {ok, FinalUserAcc}
+ end.
list_deleted_dbs_info() ->
@@ -390,26 +385,22 @@ list_deleted_dbs_info(UserFun, UserAcc0, Options0) ->
NewFutureQ = queue:in({DbName, TimeStamp, InfoFuture}, FutureQ),
drain_deleted_info_futures(NewFutureQ, Count + 1, UserFun, Acc)
end,
- fabric2_fdb:transactional(fun(Tx) ->
- try
- UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
- InitAcc = {queue:new(), 0, UserAcc1},
+ try
+ UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
+ InitAcc = {queue:new(), 0, UserAcc1},
+ UserAcc3 = fabric2_fdb:transactional(fun(Tx) ->
{FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_deleted_dbs_info(
Tx,
FoldFun,
InitAcc,
Options2
),
- UserAcc3 = drain_all_deleted_info_futures(
- FinalFutureQ,
- UserFun,
- UserAcc2
- ),
- {ok, maybe_stop(UserFun(complete, UserAcc3))}
- catch throw:{stop, FinalUserAcc} ->
- {ok, FinalUserAcc}
- end
- end).
+ drain_all_deleted_info_futures(FinalFutureQ, UserFun, UserAcc2)
+ end),
+ {ok, maybe_stop(UserFun(complete, UserAcc3))}
+ catch throw:{stop, FinalUserAcc} ->
+ {ok, FinalUserAcc}
+ end.
is_admin(Db, {SecProps}) when is_list(SecProps) ->
diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl
index 3d90c65b5..ab157d881 100644
--- a/src/fabric/test/fabric2_db_crud_tests.erl
+++ b/src/fabric/test/fabric2_db_crud_tests.erl
@@ -449,9 +449,12 @@ list_dbs_tx_too_old(_) ->
?assertMatch({ok, _}, fabric2_db:create(DbName1, [])),
?assertMatch({ok, _}, fabric2_db:create(DbName2, [])),
- UserFun = fun(Row, Acc) ->
- fabric2_test_util:tx_too_old_raise_in_user_fun(),
- {ok, [Row | Acc]}
+ UserFun = fun
+ ({row, _} = Row, Acc) ->
+ fabric2_test_util:tx_too_old_raise_in_user_fun(),
+ {ok, [Row | Acc]};
+ (Row, Acc) ->
+ {ok, [Row | Acc]}
end,
% Get get expected output without any transactions timing out
@@ -492,9 +495,12 @@ list_dbs_info_tx_too_old(_) ->
DbName
end, lists:seq(1, DbCount)),
- UserFun = fun(Row, Acc) ->
- fabric2_test_util:tx_too_old_raise_in_user_fun(),
- {ok, [Row | Acc]}
+ UserFun = fun
+ ({row, _} = Row, Acc) ->
+ fabric2_test_util:tx_too_old_raise_in_user_fun(),
+ {ok, [Row | Acc]};
+ (Row, Acc) ->
+ {ok, [Row | Acc]}
end,
% This is the expected return with no tx timeouts