summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2020-09-15 14:56:18 -0400
committerNick Vatamaniuc <vatamane@apache.org>2020-09-15 14:56:18 -0400
commit65b41ed8d9f7ffcc5fa031db6f9318bac660401e (patch)
treeed7fa7969ef07298f37f7a033b806e4b33828961
parentbe80e84f5ac27a787a7af14af1c1661eaaa34568 (diff)
downloadcouchdb-prototype/fdb-replicator.tar.gz
[fixup|_job] improve commentsprototype/fdb-replicator
-rw-r--r--src/couch_replicator/src/couch_replicator_job.erl13
1 files changed, 10 insertions, 3 deletions
diff --git a/src/couch_replicator/src/couch_replicator_job.erl b/src/couch_replicator/src/couch_replicator_job.erl
index ae8aa9da3..ed3d00d7b 100644
--- a/src/couch_replicator/src/couch_replicator_job.erl
+++ b/src/couch_replicator/src/couch_replicator_job.erl
@@ -297,10 +297,11 @@ handle_info({'EXIT', Pid, Reason}, #rep_state{changes_queue=Pid} = State) ->
handle_info({'EXIT', Pid, normal}, #rep_state{workers = Workers} = State) ->
case Workers -- [Pid] of
Workers ->
- couch_log:error("unknown pid bit the dust ~p ~n", [Pid]),
+ %% Processes might be linked by replicator's auth plugins so
+ %% we tolerate them exiting `normal` here and don't crash
+ LogMsg = "~p: unknown pid exited `normal` ~p",
+ couch_log:error(LogMsg, [?MODULE, Pid]),
{noreply, State#rep_state{workers = Workers}};
- %% not clear why a stop was here before
- %%{stop, {unknown_process_died, Pid, normal}, State};
[] ->
catch unlink(State#rep_state.changes_manager),
catch exit(State#rep_state.changes_manager, kill),
@@ -564,11 +565,17 @@ check_ownership(#{jtx := true} = JTx, Job, JobData) ->
case couch_replicator_jobs:get_job_data(JTx, OtherJobId) of
{ok, #{?STATE := S, ?DB_NAME := null}} when
S == ?ST_RUNNING; S == ?ST_PENDING ->
+ % Conflicting job is a transient job, not associated with a
+ % _replicator doc, so we let this job retry. This is also
+ % partly done for compatibility with pervious replicator
+ % behavior.
Error = <<"Duplicate job running: ", OtherJobId/binary>>,
reschedule_on_error(JTx, Job, JobData, Error),
not_owner;
{ok, #{?STATE := S, ?DB_NAME := <<_/binary>>}} when
S == ?ST_RUNNING; S == ?ST_PENDING ->
+ % Conflicting job is a permanent replication job, so this
+ % job is marked as failed.
Error = <<"Duplicate job running: ", OtherJobId/binary>>,
fail_job(JTx, Job, JobData, Error),
not_owner;