diff options
author | Paul J. Davis <paul.joseph.davis@gmail.com> | 2018-10-23 14:18:35 -0500 |
---|---|---|
committer | Paul J. Davis <paul.joseph.davis@gmail.com> | 2019-01-18 13:03:28 -0600 |
commit | 6edb9a54b7111fdb0a1713d1af2268271e27d02f (patch) | |
tree | 328a60e22133b03d66de953e2d7aa73ee3069fd3 | |
parent | 25ac408362e9ffd565f1edf360f3bd7e6a92a7eb (diff) | |
download | couchdb-6edb9a54b7111fdb0a1713d1af2268271e27d02f.tar.gz |
Implement `couch_db:get_partition_info/2`
This feature allows us to fetch statistics for a given partition key
which will allow for users to find bloated partitions and such forth.
Co-authored-by: Garren Smith <garren.smith@gmail.com>
Co-authored-by: Robert Newson <rnewson@apache.org>
-rw-r--r-- | src/couch/src/couch_bt_engine.erl | 43 | ||||
-rw-r--r-- | src/couch/src/couch_db.erl | 8 | ||||
-rw-r--r-- | src/couch/src/couch_db_engine.erl | 24 | ||||
-rw-r--r-- | src/couch/src/couch_partition.erl | 11 | ||||
-rw-r--r-- | src/fabric/src/fabric.erl | 15 | ||||
-rw-r--r-- | src/fabric/src/fabric_db_partition_info.erl | 99 | ||||
-rw-r--r-- | src/fabric/src/fabric_rpc.erl | 5 |
7 files changed, 203 insertions, 2 deletions
diff --git a/src/couch/src/couch_bt_engine.erl b/src/couch/src/couch_bt_engine.erl index 946b74d0c..7b33c4203 100644 --- a/src/couch/src/couch_bt_engine.erl +++ b/src/couch/src/couch_bt_engine.erl @@ -42,6 +42,7 @@ get_security/1, get_props/1, get_size_info/1, + get_partition_info/2, get_update_seq/1, get_uuid/1, @@ -277,6 +278,48 @@ get_size_info(#st{} = St) -> ]. +partition_size_cb(traverse, Key, {DC, DDC, Sizes}, {Partition, DCAcc, DDCAcc, SizesAcc}) -> + case couch_partition:is_member(Key, Partition) of + true -> + {skip, {Partition, DC + DCAcc, DDC + DDCAcc, reduce_sizes(Sizes, SizesAcc)}}; + false -> + {ok, {Partition, DCAcc, DDCAcc, SizesAcc}} + end; + +partition_size_cb(visit, FDI, _PrevReds, {Partition, DCAcc, DDCAcc, Acc}) -> + InPartition = couch_partition:is_member(FDI#full_doc_info.id, Partition), + Deleted = FDI#full_doc_info.deleted, + case {InPartition, Deleted} of + {true, true} -> + {ok, {Partition, DCAcc, DDCAcc + 1, + reduce_sizes(FDI#full_doc_info.sizes, Acc)}}; + {true, false} -> + {ok, {Partition, DCAcc + 1, DDCAcc, + reduce_sizes(FDI#full_doc_info.sizes, Acc)}}; + {false, _} -> + {ok, {Partition, DCAcc, DDCAcc, Acc}} + end. + + +get_partition_info(#st{} = St, Partition) -> + StartKey = couch_partition:start_key(Partition), + EndKey = couch_partition:end_key(Partition), + Fun = fun partition_size_cb/4, + InitAcc = {Partition, 0, 0, #size_info{}}, + Options = [{start_key, StartKey}, {end_key, EndKey}], + {ok, _, OutAcc} = couch_btree:fold(St#st.id_tree, Fun, InitAcc, Options), + {Partition, DocCount, DocDelCount, SizeInfo} = OutAcc, + [ + {partition, Partition}, + {doc_count, DocCount}, + {doc_del_count, DocDelCount}, + {sizes, [ + {active, SizeInfo#size_info.active}, + {external, SizeInfo#size_info.external} + ]} + ]. + + get_security(#st{header = Header} = St) -> case couch_bt_engine_header:get(Header, security_ptr) of undefined -> diff --git a/src/couch/src/couch_db.erl b/src/couch/src/couch_db.erl index 2c6f41bf7..74f4a099d 100644 --- a/src/couch/src/couch_db.erl +++ b/src/couch/src/couch_db.erl @@ -38,6 +38,7 @@ get_compacted_seq/1, get_compactor_pid/1, get_db_info/1, + get_partition_info/2, get_del_doc_count/1, get_doc_count/1, get_epochs/1, @@ -633,6 +634,13 @@ get_db_info(Db) -> ], {ok, InfoList}. +get_partition_info(#db{} = Db, Partition) when is_binary(Partition) -> + Info = couch_db_engine:get_partition_info(Db, Partition), + {ok, Info}; +get_partition_info(_Db, _Partition) -> + throw({bad_request, <<"`partition` is not valid">>}). + + get_design_doc(#db{name = <<"shards/", _/binary>> = ShardDbName}, DDocId0) -> DDocId = couch_util:normalize_ddoc_id(DDocId0), DbName = mem3:dbname(ShardDbName), diff --git a/src/couch/src/couch_db_engine.erl b/src/couch/src/couch_db_engine.erl index 806d352cb..91d35b0c7 100644 --- a/src/couch/src/couch_db_engine.erl +++ b/src/couch/src/couch_db_engine.erl @@ -44,6 +44,12 @@ -type purge_info() :: {purge_seq(), uuid(), docid(), revs()}. -type epochs() :: [{Node::atom(), UpdateSeq::non_neg_integer()}]. -type size_info() :: [{Name::atom(), Size::non_neg_integer()}]. +-type partition_info() :: [ + {partition, Partition::binary()} | + {doc_count, DocCount::non_neg_integer()} | + {doc_del_count, DocDelCount::non_neg_integer()} | + {sizes, size_info()} +]. -type write_stream_options() :: [ {buffer_size, Size::pos_integer()} | @@ -263,6 +269,18 @@ -callback get_size_info(DbHandle::db_handle()) -> SizeInfo::size_info(). +% This returns the information for the given partition. +% It should just be a list of {Name::atom(), Size::non_neg_integer()} +% It returns the partition name, doc count, deleted doc count and two sizes: +% +% active - Theoretical minimum number of bytes to store this partition on disk +% +% external - Number of bytes that would be required to represent the +% contents of this partition outside of the database +-callback get_partition_info(DbHandle::db_handle(), Partition::binary()) -> + partition_info(). + + % The current update sequence of the database. The update % sequence should be incrememnted for every revision added to % the database. @@ -685,6 +703,7 @@ get_security/1, get_props/1, get_size_info/1, + get_partition_info/2, get_update_seq/1, get_uuid/1, @@ -861,6 +880,11 @@ get_size_info(#db{} = Db) -> Engine:get_size_info(EngineState). +get_partition_info(#db{} = Db, Partition) -> + #db{engine = {Engine, EngineState}} = Db, + Engine:get_partition_info(EngineState, Partition). + + get_update_seq(#db{} = Db) -> #db{engine = {Engine, EngineState}} = Db, Engine:get_update_seq(EngineState). diff --git a/src/couch/src/couch_partition.erl b/src/couch/src/couch_partition.erl index 783921f0a..9ff77a0ff 100644 --- a/src/couch/src/couch_partition.erl +++ b/src/couch/src/couch_partition.erl @@ -18,6 +18,9 @@ from_docid/1, is_member/2, + start_key/1, + end_key/1, + validate_dbname/2, validate_docid/1, validate_partition/1, @@ -59,6 +62,14 @@ is_member(DocId, Partition) -> end. +start_key(Partition) -> + <<Partition/binary, ":">>. + + +end_key(Partition) -> + <<Partition/binary, ";">>. + + validate_dbname(DbName, Options) when is_list(DbName) -> validate_dbname(?l2b(DbName), Options); validate_dbname(DbName, Options) when is_binary(DbName) -> diff --git a/src/fabric/src/fabric.erl b/src/fabric/src/fabric.erl index 70d37679a..7476ff7b2 100644 --- a/src/fabric/src/fabric.erl +++ b/src/fabric/src/fabric.erl @@ -23,7 +23,7 @@ get_revs_limit/1, get_security/1, get_security/2, get_all_security/1, get_all_security/2, get_purge_infos_limit/1, set_purge_infos_limit/3, - compact/1, compact/2]). + compact/1, compact/2, get_partition_info/2]). % Documents -export([open_doc/3, open_revs/4, get_doc_info/3, get_full_doc_info/3, @@ -86,6 +86,19 @@ all_dbs(Prefix) when is_list(Prefix) -> get_db_info(DbName) -> fabric_db_info:go(dbname(DbName)). +%% @doc returns the size of a given partition +-spec get_partition_info(dbname(), Partition::binary()) -> + {ok, [ + {db_name, binary()} | + {partition, binary()} | + {doc_count, non_neg_integer()} | + {doc_del_count, non_neg_integer()} | + {sizes, json_obj()} + ]}. +get_partition_info(DbName, Partition) -> + fabric_db_partition_info:go(dbname(DbName), Partition). + + %% @doc the number of docs in a database %% @equiv get_doc_count(DbName, <<"_all_docs">>) get_doc_count(DbName) -> diff --git a/src/fabric/src/fabric_db_partition_info.erl b/src/fabric/src/fabric_db_partition_info.erl new file mode 100644 index 000000000..97e669a52 --- /dev/null +++ b/src/fabric/src/fabric_db_partition_info.erl @@ -0,0 +1,99 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(fabric_db_partition_info). + +-export([go/2]). + +-include_lib("fabric/include/fabric.hrl"). +-include_lib("mem3/include/mem3.hrl"). + +go(DbName, Partition) -> + Shards = mem3:shards(DbName, <<Partition/binary, ":foo">>), + Workers = fabric_util:submit_jobs(Shards, get_partition_info, [Partition]), + RexiMon = fabric_util:create_monitors(Shards), + Fun = fun handle_message/3, + Acc0 = {fabric_dict:init(Workers, nil), []}, + try + case fabric_util:recv(Workers, #shard.ref, Fun, Acc0) of + {ok, Acc} -> {ok, Acc}; + {timeout, {WorkersDict, _}} -> + DefunctWorkers = fabric_util:remove_done_workers( + WorkersDict, + nil + ), + fabric_util:log_timeout( + DefunctWorkers, + "get_partition_info" + ), + {error, timeout}; + {error, Error} -> throw(Error) + end + after + rexi_monitor:stop(RexiMon) + end. + +handle_message({rexi_DOWN, _, {_,NodeRef},_}, _Shard, {Counters, Acc}) -> + case fabric_util:remove_down_workers(Counters, NodeRef) of + {ok, NewCounters} -> + {ok, {NewCounters, Acc}}; + error -> + {error, {nodedown, <<"progress not possible">>}} + end; + +handle_message({rexi_EXIT, Reason}, Shard, {Counters, Acc}) -> + NewCounters = fabric_dict:erase(Shard, Counters), + case fabric_view:is_progress_possible(NewCounters) of + true -> + {ok, {NewCounters, Acc}}; + false -> + {error, Reason} + end; + +handle_message({ok, Info}, #shard{dbname=Name} = Shard, {Counters, Acc}) -> + Acc2 = [Info | Acc], + Counters1 = fabric_dict:erase(Shard, Counters), + case fabric_dict:size(Counters1) =:= 0 of + true -> + [FirstInfo | RestInfos] = Acc2, + PartitionInfo = get_max_partition_size(FirstInfo, RestInfos), + {stop, [{db_name, Name} | format_partition(PartitionInfo)]}; + false -> + {ok, {Counters1, Acc2}} + end; + +handle_message(_, _, Acc) -> + {ok, Acc}. + + +get_max_partition_size(Max, []) -> + Max; +get_max_partition_size(MaxInfo, [NextInfo | Rest]) -> + {sizes, MaxSize} = lists:keyfind(sizes, 1, MaxInfo), + {sizes, NextSize} = lists:keyfind(sizes, 1, NextInfo), + + {external, MaxExtSize} = lists:keyfind(external, 1, MaxSize), + {external, NextExtSize} = lists:keyfind(external, 1, NextSize), + case NextExtSize > MaxExtSize of + true -> + get_max_partition_size(NextInfo, Rest); + false -> + get_max_partition_size(MaxInfo, Rest) + end. + + +% for JS to work nicely we need to convert the size list +% to a jiffy object +format_partition(PartitionInfo) -> + {value, {sizes, Size}, PartitionInfo1} = lists:keytake(sizes, 1, PartitionInfo), + [{sizes, {Size}} | PartitionInfo1]. + diff --git a/src/fabric/src/fabric_rpc.erl b/src/fabric/src/fabric_rpc.erl index 2b00a3668..b80cc792e 100644 --- a/src/fabric/src/fabric_rpc.erl +++ b/src/fabric/src/fabric_rpc.erl @@ -19,7 +19,7 @@ -export([all_docs/3, changes/3, map_view/4, reduce_view/4, group_info/2]). -export([create_db/1, create_db/2, delete_db/1, reset_validation_funs/1, set_security/3, set_revs_limit/3, create_shard_db_doc/2, - delete_shard_db_doc/2]). + delete_shard_db_doc/2, get_partition_info/2]). -export([get_all_security/2, open_shard/2]). -export([compact/1, compact/2]). -export([get_purge_seq/2, purge_docs/3, set_purge_infos_limit/3]). @@ -195,6 +195,9 @@ get_db_info(DbName) -> get_db_info(DbName, DbOptions) -> with_db(DbName, DbOptions, {couch_db, get_db_info, []}). +get_partition_info(DbName, Partition) -> + with_db(DbName, [], {couch_db, get_partition_info, [Partition]}). + %% equiv get_doc_count(DbName, []) get_doc_count(DbName) -> get_doc_count(DbName, []). |