summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2021-04-26 15:30:15 -0400
committerNick Vatamaniuc <vatamane@gmail.com>2021-04-27 12:56:02 -0400
commit2d574b070ae03b4736c1cdb5b2af42e1041034bb (patch)
treed13c693274d881e38abae41aa78c7eb61817a73a
parent10be4b93b994315479029f7f7708d58420aa5e4b (diff)
downloadcouchdb-improve-retry-handling-in-dbs-endpoints.tar.gz
Improve transaction retry behavior for _all_dbs, _dbs_info and _deleted_dbsimprove-retry-handling-in-dbs-endpoints
After recent improvements to how retries are handled in `fabric2_fdb`, Elixir integration tests can often pass when running under "buggify" mode. The chance of re-sending response data during retries is now much lower, too. However, there are still some instances of that type of failure when running `_all_dbs` tests. To trigger it would have to run the all_dbs test from basics_tests.exs a few thousands times in a row. The reason for the failure is that retryable errors might be still thrown during the `LayerPrefix = get_dir(Tx)` call, and the whole transaction closure would be retried in [2]. When that happens, user's callback is called twice with `meta` and it sends `[` twice in a row to the client [3], which is incorrect. A simple fix is to not call `meta` or `complete` from the transaction context. That works because we don't pass the transaction object into user's callback and the user won't be able to run another transaction in the same process anyway. There are already tests which test retriable errors in _all_dbsa and _dbs_info, but they had been updated to only throw retriable errors when rows are emitted to match the new behavior. [0] https://github.com/apache/couchdb/commit/acb43e12fd7fddc6f606246875909f7c7df27324 [1] ``` ERL_ZFLAGS="-erlfdb network_options '[client_buggify_enable, {client_buggify_section_activated_probability, 35}, {client_buggify_section_fired_probability, 35}]'" make elixir tests=test/elixir/test/basics_test.exs:71 ``` [2] https://github.com/apache/couchdb/blob/082f8078411aab7d71cc86afca0fe2eff3104b01/src/fabric/src/fabric2_db.erl#L279-L287 [3] https://github.com/apache/couchdb/blob/082f8078411aab7d71cc86afca0fe2eff3104b01/src/chttpd/src/chttpd_misc.erl#L137
-rw-r--r--src/fabric/src/fabric2_db.erl67
-rw-r--r--src/fabric/test/fabric2_db_crud_tests.erl18
2 files changed, 41 insertions, 44 deletions
diff --git a/src/fabric/src/fabric2_db.erl b/src/fabric/src/fabric2_db.erl
index 0074865a0..a5de00ca0 100644
--- a/src/fabric/src/fabric2_db.erl
+++ b/src/fabric/src/fabric2_db.erl
@@ -275,20 +275,15 @@ list_dbs(UserFun, UserAcc0, Options) ->
FoldFun = fun
(DbName, Acc) -> maybe_stop(UserFun({row, [{id, DbName}]}, Acc))
end,
- fabric2_fdb:transactional(fun(Tx) ->
- try
- UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
- UserAcc2 = fabric2_fdb:list_dbs(
- Tx,
- FoldFun,
- UserAcc1,
- Options
- ),
- {ok, maybe_stop(UserFun(complete, UserAcc2))}
- catch throw:{stop, FinalUserAcc} ->
- {ok, FinalUserAcc}
- end
- end).
+ try
+ UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
+ UserAcc2 = fabric2_fdb:transactional(fun(Tx) ->
+ fabric2_fdb:list_dbs(Tx, FoldFun, UserAcc1, Options)
+ end),
+ {ok, maybe_stop(UserFun(complete, UserAcc2))}
+ catch throw:{stop, FinalUserAcc} ->
+ {ok, FinalUserAcc}
+ end.
list_dbs_info() ->
@@ -313,22 +308,22 @@ list_dbs_info(UserFun, UserAcc0, Options) ->
NewFutureQ = queue:in({DbName, InfoFuture}, FutureQ),
drain_info_futures(NewFutureQ, Count + 1, UserFun, Acc)
end,
- fabric2_fdb:transactional(fun(Tx) ->
- try
- UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
- InitAcc = {queue:new(), 0, UserAcc1},
+ try
+ UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
+ InitAcc = {queue:new(), 0, UserAcc1},
+ UserAcc3 = fabric2_fdb:transactional(fun(Tx) ->
{FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_dbs_info(
Tx,
FoldFun,
InitAcc,
Options
),
- UserAcc3 = drain_all_info_futures(FinalFutureQ, UserFun, UserAcc2),
- {ok, maybe_stop(UserFun(complete, UserAcc3))}
- catch throw:{stop, FinalUserAcc} ->
- {ok, FinalUserAcc}
- end
- end).
+ drain_all_info_futures(FinalFutureQ, UserFun, UserAcc2)
+ end),
+ {ok, maybe_stop(UserFun(complete, UserAcc3))}
+ catch throw:{stop, FinalUserAcc} ->
+ {ok, FinalUserAcc}
+ end.
list_deleted_dbs_info() ->
@@ -390,26 +385,22 @@ list_deleted_dbs_info(UserFun, UserAcc0, Options0) ->
NewFutureQ = queue:in({DbName, TimeStamp, InfoFuture}, FutureQ),
drain_deleted_info_futures(NewFutureQ, Count + 1, UserFun, Acc)
end,
- fabric2_fdb:transactional(fun(Tx) ->
- try
- UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
- InitAcc = {queue:new(), 0, UserAcc1},
+ try
+ UserAcc1 = maybe_stop(UserFun({meta, []}, UserAcc0)),
+ InitAcc = {queue:new(), 0, UserAcc1},
+ UserAcc3 = fabric2_fdb:transactional(fun(Tx) ->
{FinalFutureQ, _, UserAcc2} = fabric2_fdb:list_deleted_dbs_info(
Tx,
FoldFun,
InitAcc,
Options2
),
- UserAcc3 = drain_all_deleted_info_futures(
- FinalFutureQ,
- UserFun,
- UserAcc2
- ),
- {ok, maybe_stop(UserFun(complete, UserAcc3))}
- catch throw:{stop, FinalUserAcc} ->
- {ok, FinalUserAcc}
- end
- end).
+ drain_all_deleted_info_futures(FinalFutureQ, UserFun, UserAcc2)
+ end),
+ {ok, maybe_stop(UserFun(complete, UserAcc3))}
+ catch throw:{stop, FinalUserAcc} ->
+ {ok, FinalUserAcc}
+ end.
is_admin(Db, {SecProps}) when is_list(SecProps) ->
diff --git a/src/fabric/test/fabric2_db_crud_tests.erl b/src/fabric/test/fabric2_db_crud_tests.erl
index 3d90c65b5..ab157d881 100644
--- a/src/fabric/test/fabric2_db_crud_tests.erl
+++ b/src/fabric/test/fabric2_db_crud_tests.erl
@@ -449,9 +449,12 @@ list_dbs_tx_too_old(_) ->
?assertMatch({ok, _}, fabric2_db:create(DbName1, [])),
?assertMatch({ok, _}, fabric2_db:create(DbName2, [])),
- UserFun = fun(Row, Acc) ->
- fabric2_test_util:tx_too_old_raise_in_user_fun(),
- {ok, [Row | Acc]}
+ UserFun = fun
+ ({row, _} = Row, Acc) ->
+ fabric2_test_util:tx_too_old_raise_in_user_fun(),
+ {ok, [Row | Acc]};
+ (Row, Acc) ->
+ {ok, [Row | Acc]}
end,
% Get get expected output without any transactions timing out
@@ -492,9 +495,12 @@ list_dbs_info_tx_too_old(_) ->
DbName
end, lists:seq(1, DbCount)),
- UserFun = fun(Row, Acc) ->
- fabric2_test_util:tx_too_old_raise_in_user_fun(),
- {ok, [Row | Acc]}
+ UserFun = fun
+ ({row, _} = Row, Acc) ->
+ fabric2_test_util:tx_too_old_raise_in_user_fun(),
+ {ok, [Row | Acc]};
+ (Row, Acc) ->
+ {ok, [Row | Acc]}
end,
% This is the expected return with no tx timeouts