summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Kocoloski <adam@cloudant.com>2013-05-30 11:49:29 -0400
committerAdam Kocoloski <adam@cloudant.com>2013-05-30 11:51:12 -0400
commitef634e7f6f10a5c3710fea525a9172808c981967 (patch)
tree93c20ec924c09e4ef4cf676f65413ebcce1e052c
parent3a1d0b79805295d6099f9bc078e15676e064c55c (diff)
downloadcouchdb-ef634e7f6f10a5c3710fea525a9172808c981967.tar.gz
Identify and report on conflicted partition tables
BugzID: 19527
-rw-r--r--src/custodian/src/custodian_server.erl11
-rw-r--r--src/custodian/src/custodian_util.erl30
2 files changed, 32 insertions, 9 deletions
diff --git a/src/custodian/src/custodian_server.erl b/src/custodian/src/custodian_server.erl
index 3b947b782..4429802a2 100644
--- a/src/custodian/src/custodian_server.erl
+++ b/src/custodian/src/custodian_server.erl
@@ -106,10 +106,19 @@ handle_db_event(_DbName, _Event, _St) ->
{ok, nil}.
check_shards() ->
- {Unavailable, Impaired} = custodian:summary(),
+ {Unavailable, Impaired, Conflicted} = custodian:summary(),
+ send_conflicted_alert(Conflicted),
send_unavailable_alert(Unavailable),
send_impaired_alert(Impaired).
+send_conflicted_alert(0) ->
+ twig:log(notice, "No partition tables conflicted in this cluster", []),
+ os:cmd("send_snmptrap --trap CLOUDANT-DBCORE-MIB::cloudantDbcoreNoPartitionTablesConflictedEvent");
+send_conflicted_alert(Count) when is_integer(Count) ->
+ twig:log(crit, "~B conflicted partition tables in this cluster", [Count]),
+ os:cmd("send_snmptrap --trap CLOUDANT-DBCORE-MIB::cloudantDbcorePartitionTablesConflictedEvent -o cloudantDbcoreShardCount:INTEGER:"
+ ++ integer_to_list(Count)).
+
send_impaired_alert(0) ->
twig:log(notice, "No shards impaired in this cluster", []),
os:cmd("send_snmptrap --trap CLOUDANT-DBCORE-MIB::cloudantDbcoreAllShardsUnimpairedEvent");
diff --git a/src/custodian/src/custodian_util.erl b/src/custodian/src/custodian_util.erl
index 0b0112543..5d132d354 100644
--- a/src/custodian/src/custodian_util.erl
+++ b/src/custodian/src/custodian_util.erl
@@ -12,18 +12,22 @@
%% public functions.
summary() ->
- Fun = fun(_Id, _Range, unavailable, {Unavailable, Impaired}) ->
- {Unavailable + 1, Impaired};
- (_Id, _Range, {impaired, _N}, {Unavailable, Impaired}) ->
- {Unavailable, Impaired + 1}
+ Fun = fun(_Id, _Range, unavailable, {U, I, C}) ->
+ {U + 1, I, C};
+ (_Id, _Range, {impaired, _N}, {U, I, C}) ->
+ {U, I + 1, C};
+ (_Id, _Range, {conflicted, _N}, {U, I, C}) ->
+ {U, I, C + 1}
end,
- fold_dbs({0, 0}, Fun).
+ fold_dbs({0, 0, 0}, Fun).
report() ->
Fun = fun(Id, Range, unavailable, Acc) ->
[{Id, Range, unavailable}|Acc];
(Id, Range, {impaired, N}, Acc) ->
- [{Id, Range, {impaired, N}}|Acc]
+ [{Id, Range, {impaired, N}}|Acc];
+ (Id, _Range, {conflicted, N}, Acc) ->
+ [{Id, {conflicted, N}}|Acc]
end,
fold_dbs([], Fun).
@@ -50,11 +54,17 @@ fold_dbs(#full_doc_info{id = <<"_design/", _/binary>>}, _, Acc) ->
{ok, Acc};
fold_dbs(#full_doc_info{deleted=true}, _, Acc) ->
{ok, Acc};
-fold_dbs(#full_doc_info{id = Id} = FDI, _, {_Live, _N, _Fun, Db, _Acc0} = Acc) ->
+fold_dbs(#full_doc_info{id = Id} = FDI, _, {_Live, _N, Fun, Db, Acc0} = Acc) ->
+ InternalAcc = case count_conflicts(FDI) of
+ 0 ->
+ Acc0;
+ ConflictCount ->
+ Fun(Id, null, {conflicted, ConflictCount}, Acc0)
+ end,
Shards = load_shards(Db, FDI),
Rs = [R || #shard{range=R} <- lists:ukeysort(#shard.range, Shards)],
ActualN = [{R1, [N || #shard{node=N,range=R2} <- Shards, R1 == R2]} || R1 <- Rs],
- fold_dbs(Id, ActualN, Acc);
+ fold_dbs(Id, ActualN, setelement(5, Acc, InternalAcc));
fold_dbs(_Id, [], Acc) ->
{ok, Acc};
fold_dbs(Id, [{Range, Nodes}|Rest], {Live, N, Fun, Db, Acc0}) ->
@@ -91,6 +101,10 @@ maybe_redirect([Node|Rest], Acc) ->
maybe_redirect(Rest, [list_to_atom(Redirect)|Acc])
end.
+count_conflicts(#full_doc_info{rev_tree = T}) ->
+ Leafs = [1 || {#leaf{deleted=false}, _} <- couch_key_tree:get_all_leafs(T)],
+ length(Leafs) - 1.
+
ensure_custodian_ddoc_exists(Db) ->
case couch_db:open_doc(Db, ?CUSTODIAN_ID) of
{not_found, _Reason} ->