summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2021-04-06 10:04:17 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2021-04-06 11:44:17 -0400
commit7a6ea6545338f942ecf9fb590d5372b73867e0b9 (patch)
treefaced6e863744c2c09a214d426351db7fcf2fac3
parent8843083046ad28b4c987ac4da580103eb43fa15c (diff)
downloadcouchdb-7a6ea6545338f942ecf9fb590d5372b73867e0b9.tar.gz
Retryable error fixes in couch_jobs_type_monitor
This continues improvements to retryable error handling started in https://github.com/apache/couchdb/pull/3460. Here we add the same logic we already have for the `erlfdb:wait/2` call in https://github.com/apache/couchdb/blob/main/src/couch_jobs/src/couch_jobs_type_monitor.erl#L55-L57 to the `get_vs_and_watch/1` section. couch_jobs_type_monitor is meant to be linked to and run in a continuous loop as long as the parent process is alive. If FDB becomes unavailable the main process which we linked to or other main component (the whole application) should crash and fail as opposed to the type monitor itself. Still, to avoid running in a tight loop we use the holdoff interval to sleep a bit before recursing. The typical values of the holdoff is around 50-100 msec.
-rw-r--r--src/couch_jobs/src/couch_jobs_type_monitor.erl21
1 files changed, 17 insertions, 4 deletions
diff --git a/src/couch_jobs/src/couch_jobs_type_monitor.erl b/src/couch_jobs/src/couch_jobs_type_monitor.erl
index a62eb6217..b58f34ecf 100644
--- a/src/couch_jobs/src/couch_jobs_type_monitor.erl
+++ b/src/couch_jobs/src/couch_jobs_type_monitor.erl
@@ -81,7 +81,20 @@ notify(#st{} = St) ->
St#st{timestamp = Now}.
-get_vs_and_watch(#st{jtx = JTx, type = Type}) ->
- couch_jobs_fdb:tx(JTx, fun(JTx1) ->
- couch_jobs_fdb:get_activity_vs_and_watch(JTx1, Type)
- end).
+get_vs_and_watch(#st{} = St) ->
+ #st{jtx = JTx, type = Type, holdoff = HoldOff} = St,
+ try
+ couch_jobs_fdb:tx(JTx, fun(JTx1) ->
+ couch_jobs_fdb:get_activity_vs_and_watch(JTx1, Type)
+ end)
+ catch
+ error:{erlfdb_error, ?ERLFDB_TRANSACTION_TIMED_OUT} ->
+ timer:sleep(HoldOff),
+ get_vs_and_watch(St);
+ error:{erlfdb_error, Code} when ?ERLFDB_IS_RETRYABLE(Code) ->
+ timer:sleep(HoldOff),
+ get_vs_and_watch(St);
+ error:{timeout, _} ->
+ timer:sleep(HoldOff),
+ get_vs_and_watch(St)
+ end.