From 65b41ed8d9f7ffcc5fa031db6f9318bac660401e Mon Sep 17 00:00:00 2001 From: Nick Vatamaniuc Date: Tue, 15 Sep 2020 14:56:18 -0400 Subject: [fixup|_job] improve comments --- src/couch_replicator/src/couch_replicator_job.erl | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/couch_replicator/src/couch_replicator_job.erl b/src/couch_replicator/src/couch_replicator_job.erl index ae8aa9da3..ed3d00d7b 100644 --- a/src/couch_replicator/src/couch_replicator_job.erl +++ b/src/couch_replicator/src/couch_replicator_job.erl @@ -297,10 +297,11 @@ handle_info({'EXIT', Pid, Reason}, #rep_state{changes_queue=Pid} = State) -> handle_info({'EXIT', Pid, normal}, #rep_state{workers = Workers} = State) -> case Workers -- [Pid] of Workers -> - couch_log:error("unknown pid bit the dust ~p ~n", [Pid]), + %% Processes might be linked by replicator's auth plugins so + %% we tolerate them exiting `normal` here and don't crash + LogMsg = "~p: unknown pid exited `normal` ~p", + couch_log:error(LogMsg, [?MODULE, Pid]), {noreply, State#rep_state{workers = Workers}}; - %% not clear why a stop was here before - %%{stop, {unknown_process_died, Pid, normal}, State}; [] -> catch unlink(State#rep_state.changes_manager), catch exit(State#rep_state.changes_manager, kill), @@ -564,11 +565,17 @@ check_ownership(#{jtx := true} = JTx, Job, JobData) -> case couch_replicator_jobs:get_job_data(JTx, OtherJobId) of {ok, #{?STATE := S, ?DB_NAME := null}} when S == ?ST_RUNNING; S == ?ST_PENDING -> + % Conflicting job is a transient job, not associated with a + % _replicator doc, so we let this job retry. This is also + % partly done for compatibility with pervious replicator + % behavior. Error = <<"Duplicate job running: ", OtherJobId/binary>>, reschedule_on_error(JTx, Job, JobData, Error), not_owner; {ok, #{?STATE := S, ?DB_NAME := <<_/binary>>}} when S == ?ST_RUNNING; S == ?ST_PENDING -> + % Conflicting job is a permanent replication job, so this + % job is marked as failed. Error = <<"Duplicate job running: ", OtherJobId/binary>>, fail_job(JTx, Job, JobData, Error), not_owner; -- cgit v1.2.1