diff options
author | Nick Vatamaniuc <vatamane@apache.org> | 2020-09-15 14:56:18 -0400 |
---|---|---|
committer | Nick Vatamaniuc <vatamane@apache.org> | 2020-09-15 14:56:18 -0400 |
commit | 65b41ed8d9f7ffcc5fa031db6f9318bac660401e (patch) | |
tree | ed7fa7969ef07298f37f7a033b806e4b33828961 | |
parent | be80e84f5ac27a787a7af14af1c1661eaaa34568 (diff) | |
download | couchdb-prototype/fdb-replicator.tar.gz |
[fixup|_job] improve commentsprototype/fdb-replicator
-rw-r--r-- | src/couch_replicator/src/couch_replicator_job.erl | 13 |
1 files changed, 10 insertions, 3 deletions
diff --git a/src/couch_replicator/src/couch_replicator_job.erl b/src/couch_replicator/src/couch_replicator_job.erl index ae8aa9da3..ed3d00d7b 100644 --- a/src/couch_replicator/src/couch_replicator_job.erl +++ b/src/couch_replicator/src/couch_replicator_job.erl @@ -297,10 +297,11 @@ handle_info({'EXIT', Pid, Reason}, #rep_state{changes_queue=Pid} = State) -> handle_info({'EXIT', Pid, normal}, #rep_state{workers = Workers} = State) -> case Workers -- [Pid] of Workers -> - couch_log:error("unknown pid bit the dust ~p ~n", [Pid]), + %% Processes might be linked by replicator's auth plugins so + %% we tolerate them exiting `normal` here and don't crash + LogMsg = "~p: unknown pid exited `normal` ~p", + couch_log:error(LogMsg, [?MODULE, Pid]), {noreply, State#rep_state{workers = Workers}}; - %% not clear why a stop was here before - %%{stop, {unknown_process_died, Pid, normal}, State}; [] -> catch unlink(State#rep_state.changes_manager), catch exit(State#rep_state.changes_manager, kill), @@ -564,11 +565,17 @@ check_ownership(#{jtx := true} = JTx, Job, JobData) -> case couch_replicator_jobs:get_job_data(JTx, OtherJobId) of {ok, #{?STATE := S, ?DB_NAME := null}} when S == ?ST_RUNNING; S == ?ST_PENDING -> + % Conflicting job is a transient job, not associated with a + % _replicator doc, so we let this job retry. This is also + % partly done for compatibility with pervious replicator + % behavior. Error = <<"Duplicate job running: ", OtherJobId/binary>>, reschedule_on_error(JTx, Job, JobData, Error), not_owner; {ok, #{?STATE := S, ?DB_NAME := <<_/binary>>}} when S == ?ST_RUNNING; S == ?ST_PENDING -> + % Conflicting job is a permanent replication job, so this + % job is marked as failed. Error = <<"Duplicate job running: ", OtherJobId/binary>>, fail_job(JTx, Job, JobData, Error), not_owner; |