diff options
author | Adam Kocoloski <adam@cloudant.com> | 2013-05-30 11:49:29 -0400 |
---|---|---|
committer | Adam Kocoloski <adam@cloudant.com> | 2013-05-30 11:51:12 -0400 |
commit | ef634e7f6f10a5c3710fea525a9172808c981967 (patch) | |
tree | 93c20ec924c09e4ef4cf676f65413ebcce1e052c | |
parent | 3a1d0b79805295d6099f9bc078e15676e064c55c (diff) | |
download | couchdb-ef634e7f6f10a5c3710fea525a9172808c981967.tar.gz |
Identify and report on conflicted partition tables
BugzID: 19527
-rw-r--r-- | src/custodian/src/custodian_server.erl | 11 | ||||
-rw-r--r-- | src/custodian/src/custodian_util.erl | 30 |
2 files changed, 32 insertions, 9 deletions
diff --git a/src/custodian/src/custodian_server.erl b/src/custodian/src/custodian_server.erl index 3b947b782..4429802a2 100644 --- a/src/custodian/src/custodian_server.erl +++ b/src/custodian/src/custodian_server.erl @@ -106,10 +106,19 @@ handle_db_event(_DbName, _Event, _St) -> {ok, nil}. check_shards() -> - {Unavailable, Impaired} = custodian:summary(), + {Unavailable, Impaired, Conflicted} = custodian:summary(), + send_conflicted_alert(Conflicted), send_unavailable_alert(Unavailable), send_impaired_alert(Impaired). +send_conflicted_alert(0) -> + twig:log(notice, "No partition tables conflicted in this cluster", []), + os:cmd("send_snmptrap --trap CLOUDANT-DBCORE-MIB::cloudantDbcoreNoPartitionTablesConflictedEvent"); +send_conflicted_alert(Count) when is_integer(Count) -> + twig:log(crit, "~B conflicted partition tables in this cluster", [Count]), + os:cmd("send_snmptrap --trap CLOUDANT-DBCORE-MIB::cloudantDbcorePartitionTablesConflictedEvent -o cloudantDbcoreShardCount:INTEGER:" + ++ integer_to_list(Count)). + send_impaired_alert(0) -> twig:log(notice, "No shards impaired in this cluster", []), os:cmd("send_snmptrap --trap CLOUDANT-DBCORE-MIB::cloudantDbcoreAllShardsUnimpairedEvent"); diff --git a/src/custodian/src/custodian_util.erl b/src/custodian/src/custodian_util.erl index 0b0112543..5d132d354 100644 --- a/src/custodian/src/custodian_util.erl +++ b/src/custodian/src/custodian_util.erl @@ -12,18 +12,22 @@ %% public functions. summary() -> - Fun = fun(_Id, _Range, unavailable, {Unavailable, Impaired}) -> - {Unavailable + 1, Impaired}; - (_Id, _Range, {impaired, _N}, {Unavailable, Impaired}) -> - {Unavailable, Impaired + 1} + Fun = fun(_Id, _Range, unavailable, {U, I, C}) -> + {U + 1, I, C}; + (_Id, _Range, {impaired, _N}, {U, I, C}) -> + {U, I + 1, C}; + (_Id, _Range, {conflicted, _N}, {U, I, C}) -> + {U, I, C + 1} end, - fold_dbs({0, 0}, Fun). + fold_dbs({0, 0, 0}, Fun). report() -> Fun = fun(Id, Range, unavailable, Acc) -> [{Id, Range, unavailable}|Acc]; (Id, Range, {impaired, N}, Acc) -> - [{Id, Range, {impaired, N}}|Acc] + [{Id, Range, {impaired, N}}|Acc]; + (Id, _Range, {conflicted, N}, Acc) -> + [{Id, {conflicted, N}}|Acc] end, fold_dbs([], Fun). @@ -50,11 +54,17 @@ fold_dbs(#full_doc_info{id = <<"_design/", _/binary>>}, _, Acc) -> {ok, Acc}; fold_dbs(#full_doc_info{deleted=true}, _, Acc) -> {ok, Acc}; -fold_dbs(#full_doc_info{id = Id} = FDI, _, {_Live, _N, _Fun, Db, _Acc0} = Acc) -> +fold_dbs(#full_doc_info{id = Id} = FDI, _, {_Live, _N, Fun, Db, Acc0} = Acc) -> + InternalAcc = case count_conflicts(FDI) of + 0 -> + Acc0; + ConflictCount -> + Fun(Id, null, {conflicted, ConflictCount}, Acc0) + end, Shards = load_shards(Db, FDI), Rs = [R || #shard{range=R} <- lists:ukeysort(#shard.range, Shards)], ActualN = [{R1, [N || #shard{node=N,range=R2} <- Shards, R1 == R2]} || R1 <- Rs], - fold_dbs(Id, ActualN, Acc); + fold_dbs(Id, ActualN, setelement(5, Acc, InternalAcc)); fold_dbs(_Id, [], Acc) -> {ok, Acc}; fold_dbs(Id, [{Range, Nodes}|Rest], {Live, N, Fun, Db, Acc0}) -> @@ -91,6 +101,10 @@ maybe_redirect([Node|Rest], Acc) -> maybe_redirect(Rest, [list_to_atom(Redirect)|Acc]) end. +count_conflicts(#full_doc_info{rev_tree = T}) -> + Leafs = [1 || {#leaf{deleted=false}, _} <- couch_key_tree:get_all_leafs(T)], + length(Leafs) - 1. + ensure_custodian_ddoc_exists(Db) -> case couch_db:open_doc(Db, ?CUSTODIAN_ID) of {not_found, _Reason} -> |