summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2019-04-30 19:28:12 -0400
committerNick Vatamaniuc <vatamane@apache.org>2019-04-30 19:51:28 -0400
commit7cd785a1f0829e2970146891313ceb69f248cba4 (patch)
tree8ad546a63611066f0d6c2a1ba1e653038929453f
parent85617a77b1b5444568d4ed9549a39cf406729c88 (diff)
downloadcouchdb-handle-database-recreation-case-in-mem3.tar.gz
Handle database re-creation edge case in internal replicatorhandle-database-recreation-case-in-mem3
Previously, if a database was deleted and re-created while the internal replication request was pending, the job would have been retried continuously. mem3:targets_map/2 function would return an empty targets map and mem3_rep:go would raise a function clause exception if the database as present but it was an older "incarnation" of it (with shards living on different target nodes). Because it was an exception and not an {error, ...} result, the process would exit with an error. Subsequently, mem3_sync would try to handle process exit and check of the database was deleted, but it also didn't account for the case when the database was created, so it would resubmit the into queue again. To fix it, we introduce a function to check if the database shard is part of the current database shard map. Then perform the check both before building the targets map and also on job retries.
-rw-r--r--src/mem3/src/mem3.erl20
-rw-r--r--src/mem3/src/mem3_rep.erl8
-rw-r--r--src/mem3/src/mem3_sync.erl11
3 files changed, 33 insertions, 6 deletions
diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl
index dc666fdae..6f3a10df8 100644
--- a/src/mem3/src/mem3.erl
+++ b/src/mem3/src/mem3.erl
@@ -22,6 +22,7 @@
-export([belongs/2, owner/3]).
-export([get_placement/1]).
-export([ping/1, ping/2]).
+-export([db_is_current/1]).
%% For mem3 use only.
-export([name/1, node/1, range/1, engine/1]).
@@ -367,6 +368,25 @@ ping(Node, Timeout) when is_atom(Node) ->
pang
end.
+
+db_is_current(#shard{name = Name}) ->
+ db_is_current(Name);
+
+db_is_current(<<"shards/", _/binary>> = Name) ->
+ try
+ Shards = mem3:shards(mem3:dbname(Name)),
+ lists:keyfind(Name, #shard.name, Shards) =/= false
+ catch
+ error:database_does_not_exist ->
+ false
+ end;
+
+db_is_current(Name) when is_binary(Name) ->
+ % This accounts for local (non-sharded) dbs, and is mostly
+ % for unit tests that either test or use mem3_rep logic
+ couch_server:exists(Name).
+
+
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
diff --git a/src/mem3/src/mem3_rep.erl b/src/mem3/src/mem3_rep.erl
index d5b42d315..d2edd6c4d 100644
--- a/src/mem3/src/mem3_rep.erl
+++ b/src/mem3/src/mem3_rep.erl
@@ -64,7 +64,13 @@ go(DbName, Node, Opts) when is_binary(DbName), is_atom(Node) ->
go(#shard{name=DbName, node=node()}, #shard{name=DbName, node=Node}, Opts);
go(#shard{} = Source, #shard{} = Target, Opts) ->
- go(Source, targets_map(Source, Target), Opts);
+ case mem3:db_is_current(Source) of
+ true ->
+ go(Source, targets_map(Source, Target), Opts);
+ false ->
+ % Database could have been recreated
+ {error, missing_source}
+ end;
go(#shard{} = Source, #{} = Targets0, Opts) when map_size(Targets0) > 0 ->
Targets = maps:map(fun(_, T) -> #tgt{shard = T} end, Targets0),
diff --git a/src/mem3/src/mem3_sync.erl b/src/mem3/src/mem3_sync.erl
index 693fc4f31..8170f3c1a 100644
--- a/src/mem3/src/mem3_sync.erl
+++ b/src/mem3/src/mem3_sync.erl
@@ -140,11 +140,12 @@ handle_info({'EXIT', Active, Reason}, State) ->
case Reason of {pending_changes, Count} ->
maybe_resubmit(State, Job#job{pid = nil, count = Count});
_ ->
- try mem3:shards(mem3:dbname(Job#job.name)) of _ ->
- timer:apply_after(5000, ?MODULE, push, [Job#job{pid=nil}])
- catch error:database_does_not_exist ->
- % no need to retry
- ok
+ case mem3:db_is_current(Job#job.name) of
+ true ->
+ timer:apply_after(5000, ?MODULE, push, [Job#job{pid=nil}]);
+ false ->
+ % no need to retry (db deleted or recreated)
+ ok
end,
State
end;