summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTony Sun <tony.sun@cloudant.com>2017-06-12 09:26:49 -0700
committerTony Sun <tony.sun@cloudant.com>2017-07-15 11:19:20 -0700
commit6d06dcf8b450259b024217c43ba58f3c67579963 (patch)
tree872dbaeb78146c8f8f596908bc14a7efad7b470d
parentdce6e34686329e711e1a6c50aae00761ecb3262e (diff)
downloadcouchdb-3430-external-size-views.tar.gz
calculate data_size correctly3430-external-size-views
Previously, we were calculating the ExternalSize for views by summing up all the nodes in the btree. Furthermore, this was the compressed size. Now we modify the reduce function to return an ExternalSize for uncompressed values in the KVList. PR: https://github.com/apache/couchdb/pull/608 COUCHDB-3430
-rw-r--r--src/couch/test/couchdb_file_compression_tests.erl14
-rw-r--r--src/couch_mrview/src/couch_mrview_util.erl47
2 files changed, 49 insertions, 12 deletions
diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl
index 41d055624..935638614 100644
--- a/src/couch/test/couchdb_file_compression_tests.erl
+++ b/src/couch/test/couchdb_file_compression_tests.erl
@@ -125,6 +125,7 @@ compare_compression_methods(DbName) ->
DbSizeNone = db_disk_size(DbName),
ViewSizeNone = view_disk_size(DbName),
ExternalSizeNone = db_external_size(DbName),
+ ViewExternalSizeNone = view_external_size(DbName),
config:set("couchdb", "file_compression", "snappy", false),
compact_db(DbName),
@@ -132,6 +133,7 @@ compare_compression_methods(DbName) ->
DbSizeSnappy = db_disk_size(DbName),
ViewSizeSnappy = view_disk_size(DbName),
ExternalSizeSnappy = db_external_size(DbName),
+ ViewExternalSizeSnappy = view_external_size(DbName),
?assert(DbSizeNone > DbSizeSnappy),
?assert(ViewSizeNone > ViewSizeSnappy),
@@ -151,12 +153,15 @@ compare_compression_methods(DbName) ->
DbSizeDeflate9 = db_disk_size(DbName),
ViewSizeDeflate9 = view_disk_size(DbName),
ExternalSizeDeflate9 = db_external_size(DbName),
+ ViewExternalSizeDeflate9 = view_external_size(DbName),
?assert(DbSizeDeflate1 > DbSizeDeflate9),
?assert(ViewSizeDeflate1 > ViewSizeDeflate9),
?assert(ExternalSizePreCompact =:= ExternalSizeNone),
?assert(ExternalSizeNone =:= ExternalSizeSnappy),
- ?assert(ExternalSizeNone =:= ExternalSizeDeflate9).
+ ?assert(ExternalSizeNone =:= ExternalSizeDeflate9),
+ ?assert(ViewExternalSizeNone =:= ViewExternalSizeSnappy),
+ ?assert(ViewExternalSizeNone =:= ViewExternalSizeDeflate9).
populate_db(_Db, NumDocs) when NumDocs =< 0 ->
@@ -214,6 +219,13 @@ view_disk_size(DbName) ->
ok = couch_db:close(Db),
active_size(Info).
+view_external_size(DbName) ->
+ {ok, Db} = couch_db:open_int(DbName, []),
+ {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [ejson_body]),
+ {ok, Info} = couch_mrview:get_info(Db, DDoc),
+ ok = couch_db:close(Db),
+ external_size(Info).
+
active_size(Info) ->
couch_util:get_nested_json_value({Info}, [sizes, active]).
diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl
index a8462a2d8..632522240 100644
--- a/src/couch_mrview/src/couch_mrview_util.erl
+++ b/src/couch_mrview/src/couch_mrview_util.erl
@@ -338,7 +338,10 @@ temp_view_to_ddoc({Props}) ->
get_row_count(#mrview{btree=Bt}) ->
- {ok, {Count, _Reds}} = couch_btree:full_reduce(Bt),
+ Count = case couch_btree:full_reduce(Bt) of
+ {ok, {Count0, _Reds, _}} -> Count0;
+ {ok, {Count0, _Reds}} -> Count0
+ end,
{ok, Count}.
@@ -786,27 +789,33 @@ changes_ekey_opts(_StartSeq, #mrargs{end_key=EKey,
end.
+reduced_external_size(Tree) ->
+ case couch_btree:full_reduce(Tree) of
+ {ok, {_, _, Size}} -> Size;
+ % return 0 for versions of the reduce function without Size
+ {ok, {_, _}} -> 0
+ end.
calculate_external_size(Views) ->
SumFun = fun(#mrview{btree=Bt, seq_btree=SBt, key_byseq_btree=KSBt}, Acc) ->
- Size0 = sum_btree_sizes(Acc, couch_btree:size(Bt)),
+ Size0 = sum_btree_sizes(Acc, reduced_external_size(Bt)),
Size1 = case SBt of
nil -> Size0;
- _ -> sum_btree_sizes(Size0, couch_btree:size(SBt))
+ _ -> sum_btree_sizes(Size0, reduced_external_size(SBt))
end,
case KSBt of
nil -> Size1;
- _ -> sum_btree_sizes(Size1, couch_btree:size(KSBt))
+ _ -> sum_btree_sizes(Size1, reduced_external_size(KSBt))
end
end,
{ok, lists:foldl(SumFun, 0, Views)}.
sum_btree_sizes(nil, _) ->
- null;
+ 0;
sum_btree_sizes(_, nil) ->
- null;
+ 0;
sum_btree_sizes(Size1, Size2) ->
Size1 + Size2.
@@ -1038,22 +1047,32 @@ get_user_reds(Reduction) ->
element(2, Reduction).
+get_external_size_reds(Reduction) when tuple_size(Reduction) == 2 ->
+ 0;
+
+get_external_size_reds(Reduction) when tuple_size(Reduction) == 3 ->
+ element(3, Reduction).
+
+
make_reduce_fun(Lang, ReduceFuns) ->
FunSrcs = [FunSrc || {_, FunSrc} <- ReduceFuns],
fun
(reduce, KVs0) ->
KVs = detuple_kvs(expand_dups(KVs0, []), []),
{ok, Result} = couch_query_servers:reduce(Lang, FunSrcs, KVs),
- {length(KVs), Result};
+ ExternalSize = kv_external_size(KVs, Result),
+ {length(KVs), Result, ExternalSize};
(rereduce, Reds) ->
- ExtractFun = fun(Red, {CountsAcc0, URedsAcc0}) ->
+ ExtractFun = fun(Red, {CountsAcc0, URedsAcc0, ExtAcc0}) ->
CountsAcc = CountsAcc0 + get_count(Red),
URedsAcc = lists:append(URedsAcc0, [get_user_reds(Red)]),
- {CountsAcc, URedsAcc}
+ ExtAcc = ExtAcc0 + get_external_size_reds(Red),
+ {CountsAcc, URedsAcc, ExtAcc}
end,
- {Counts, UReds} = lists:foldl(ExtractFun, {0, []}, Reds),
+ {Counts, UReds, ExternalSize} = lists:foldl(ExtractFun,
+ {0, [], 0}, Reds),
{ok, Result} = couch_query_servers:rereduce(Lang, FunSrcs, UReds),
- {Counts, Result}
+ {Counts, Result, ExternalSize}
end.
@@ -1130,3 +1149,9 @@ get_view_queries({Props}) ->
_ ->
throw({bad_request, "`queries` member must be a array."})
end.
+
+
+kv_external_size(KVList, Reduction) ->
+ lists:foldl(fun([[Key, _], Value], Acc) ->
+ ?term_size(Key) + ?term_size(Value) + Acc
+ end, ?term_size(Reduction), KVList).