summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2017-02-24 12:55:37 -0600
committerPaul J. Davis <paul.joseph.davis@gmail.com>2017-04-21 10:41:30 -0500
commite4c3705def6021a6b801c0bc0ceaac4abbc7c0d8 (patch)
tree18f9348bb8d2ecd2c7b449db6997e56f5a7032dc
parent778738b1b61dbb559a0ecc2acdd8720a02cc7ec8 (diff)
downloadcouchdb-e4c3705def6021a6b801c0bc0ceaac4abbc7c0d8.tar.gz
Fix stale shards cache
There's a race condition in mem3_shards that can result in having shards in the cache for a database that's been deleted. This results in a confused cluster that thinks a database exists until you attempt to open it. The fix is to ignore any cache insert requests that come from an older version of the dbs db than mem3_shards cache knows about. Big thanks to @jdoane for the identification and original patch. COUCHDB-3376
-rw-r--r--src/mem3/src/mem3_shards.erl59
1 files changed, 48 insertions, 11 deletions
diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl
index c7f33c61f..ca5deaf45 100644
--- a/src/mem3/src/mem3_shards.erl
+++ b/src/mem3/src/mem3_shards.erl
@@ -27,7 +27,8 @@
-record(st, {
max_size = 25000,
cur_size = 0,
- changes_pid
+ changes_pid,
+ update_seq
}).
-include_lib("mem3/include/mem3.hrl").
@@ -191,11 +192,12 @@ init([]) ->
ets:new(?ATIMES, [ordered_set, protected, named_table]),
ok = config:listen_for_changes(?MODULE, nil),
SizeList = config:get("mem3", "shard_cache_size", "25000"),
- {Pid, _} = spawn_monitor(fun() -> listen_for_changes(get_update_seq()) end),
+ UpdateSeq = get_update_seq(),
{ok, #st{
max_size = list_to_integer(SizeList),
cur_size = 0,
- changes_pid = Pid
+ changes_pid = start_changes_listener(UpdateSeq),
+ update_seq = UpdateSeq
}}.
handle_call({set_max_size, Size}, _From, St) ->
@@ -210,12 +212,28 @@ handle_cast({cache_hit, DbName}, St) ->
couch_stats:increment_counter([mem3, shard_cache, hit]),
cache_hit(DbName),
{noreply, St};
-handle_cast({cache_insert, DbName, Shards}, St) ->
+handle_cast({cache_insert, DbName, Shards, UpdateSeq}, St) ->
couch_stats:increment_counter([mem3, shard_cache, miss]),
- {noreply, cache_free(cache_insert(St, DbName, Shards))};
+ % This comparison correctly uses the `<` operator
+ % and not `=<`. The easiest way to understand why is
+ % to think of when a _dbs db doesn't change. If it used
+ % `=<` it would be impossible to insert anything into
+ % the cache.
+ NewSt = case UpdateSeq < St#st.update_seq of
+ true -> St;
+ false -> cache_free(cache_insert(St, DbName, Shards))
+ end,
+ {noreply, NewSt};
handle_cast({cache_remove, DbName}, St) ->
couch_stats:increment_counter([mem3, shard_cache, eviction]),
{noreply, cache_remove(St, DbName)};
+handle_cast({cache_insert_change, DbName, Shards, UpdateSeq}, St) ->
+ Msg = {cache_insert, DbName, Shards, UpdateSeq},
+ {noreply, NewSt} = handle_cast(Msg, St),
+ {noreply, NewSt#st{update_seq = UpdateSeq}};
+handle_cast({cache_remove_change, DbName, UpdateSeq}, St) ->
+ {noreply, NewSt} = handle_cast({cache_remove, DbName}, St),
+ {noreply, NewSt#st{update_seq = UpdateSeq}};
handle_cast(_Msg, St) ->
{noreply, St}.
@@ -232,8 +250,9 @@ handle_info({'DOWN', _, _, Pid, Reason}, #st{changes_pid=Pid}=St) ->
erlang:send_after(5000, self(), {start_listener, Seq}),
{noreply, NewSt#st{changes_pid=undefined}};
handle_info({start_listener, Seq}, St) ->
- {NewPid, _} = spawn_monitor(fun() -> listen_for_changes(Seq) end),
- {noreply, St#st{changes_pid=NewPid}};
+ {noreply, St#st{
+ changes_pid = start_changes_listener(Seq)
+ }};
handle_info(restart_config_listener, State) ->
ok = config:listen_for_changes(?MODULE, nil),
{noreply, State};
@@ -249,6 +268,21 @@ code_change(_OldVsn, #st{}=St, _Extra) ->
%% internal functions
+start_changes_listener(SinceSeq) ->
+ Self = self(),
+ {Pid, _} = erlang:spawn_monitor(fun() ->
+ erlang:spawn_link(fun() ->
+ Ref = erlang:monitor(process, Self),
+ receive
+ {'DOWN', Ref, _, _, _} ->
+ ok
+ end,
+ exit(shutdown)
+ end),
+ listen_for_changes(SinceSeq)
+ end),
+ Pid.
+
fold_fun(#full_doc_info{}=FDI, _, Acc) ->
DI = couch_doc:to_doc_info(FDI),
fold_fun(DI, nil, Acc);
@@ -287,10 +321,11 @@ changes_callback({stop, EndSeq}, _) ->
exit({seq, EndSeq});
changes_callback({change, {Change}, _}, _) ->
DbName = couch_util:get_value(<<"id">>, Change),
+ Seq = couch_util:get_value(<<"seq">>, Change),
case DbName of <<"_design/", _/binary>> -> ok; _Else ->
case mem3_util:is_deleted(Change) of
true ->
- gen_server:cast(?MODULE, {cache_remove, DbName});
+ gen_server:cast(?MODULE, {cache_remove_change, DbName, Seq});
false ->
case couch_util:get_value(doc, Change) of
{error, Reason} ->
@@ -298,13 +333,14 @@ changes_callback({change, {Change}, _}, _) ->
[DbName, Reason]);
{Doc} ->
Shards = mem3_util:build_ordered_shards(DbName, Doc),
- gen_server:cast(?MODULE, {cache_insert, DbName, Shards}),
+ Msg = {cache_insert_change, DbName, Shards, Seq},
+ gen_server:cast(?MODULE, Msg),
[create_if_missing(mem3:name(S)) || S
<- Shards, mem3:node(S) =:= node()]
end
end
end,
- {ok, couch_util:get_value(<<"seq">>, Change)};
+ {ok, Seq};
changes_callback(timeout, _) ->
ok.
@@ -320,8 +356,9 @@ load_shards_from_disk(DbName) when is_binary(DbName) ->
load_shards_from_db(#db{} = ShardDb, DbName) ->
case couch_db:open_doc(ShardDb, DbName, [ejson_body]) of
{ok, #doc{body = {Props}}} ->
+ Seq = couch_db:get_update_seq(ShardDb),
Shards = mem3_util:build_ordered_shards(DbName, Props),
- gen_server:cast(?MODULE, {cache_insert, DbName, Shards}),
+ gen_server:cast(?MODULE, {cache_insert, DbName, Shards, Seq}),
Shards;
{not_found, _} ->
erlang:error(database_does_not_exist, ?b2l(DbName))