From 6d06dcf8b450259b024217c43ba58f3c67579963 Mon Sep 17 00:00:00 2001 From: Tony Sun Date: Mon, 12 Jun 2017 09:26:49 -0700 Subject: calculate data_size correctly Previously, we were calculating the ExternalSize for views by summing up all the nodes in the btree. Furthermore, this was the compressed size. Now we modify the reduce function to return an ExternalSize for uncompressed values in the KVList. PR: https://github.com/apache/couchdb/pull/608 COUCHDB-3430 --- src/couch/test/couchdb_file_compression_tests.erl | 14 ++++++- src/couch_mrview/src/couch_mrview_util.erl | 47 +++++++++++++++++------ 2 files changed, 49 insertions(+), 12 deletions(-) diff --git a/src/couch/test/couchdb_file_compression_tests.erl b/src/couch/test/couchdb_file_compression_tests.erl index 41d055624..935638614 100644 --- a/src/couch/test/couchdb_file_compression_tests.erl +++ b/src/couch/test/couchdb_file_compression_tests.erl @@ -125,6 +125,7 @@ compare_compression_methods(DbName) -> DbSizeNone = db_disk_size(DbName), ViewSizeNone = view_disk_size(DbName), ExternalSizeNone = db_external_size(DbName), + ViewExternalSizeNone = view_external_size(DbName), config:set("couchdb", "file_compression", "snappy", false), compact_db(DbName), @@ -132,6 +133,7 @@ compare_compression_methods(DbName) -> DbSizeSnappy = db_disk_size(DbName), ViewSizeSnappy = view_disk_size(DbName), ExternalSizeSnappy = db_external_size(DbName), + ViewExternalSizeSnappy = view_external_size(DbName), ?assert(DbSizeNone > DbSizeSnappy), ?assert(ViewSizeNone > ViewSizeSnappy), @@ -151,12 +153,15 @@ compare_compression_methods(DbName) -> DbSizeDeflate9 = db_disk_size(DbName), ViewSizeDeflate9 = view_disk_size(DbName), ExternalSizeDeflate9 = db_external_size(DbName), + ViewExternalSizeDeflate9 = view_external_size(DbName), ?assert(DbSizeDeflate1 > DbSizeDeflate9), ?assert(ViewSizeDeflate1 > ViewSizeDeflate9), ?assert(ExternalSizePreCompact =:= ExternalSizeNone), ?assert(ExternalSizeNone =:= ExternalSizeSnappy), - ?assert(ExternalSizeNone =:= ExternalSizeDeflate9). + ?assert(ExternalSizeNone =:= ExternalSizeDeflate9), + ?assert(ViewExternalSizeNone =:= ViewExternalSizeSnappy), + ?assert(ViewExternalSizeNone =:= ViewExternalSizeDeflate9). populate_db(_Db, NumDocs) when NumDocs =< 0 -> @@ -214,6 +219,13 @@ view_disk_size(DbName) -> ok = couch_db:close(Db), active_size(Info). +view_external_size(DbName) -> + {ok, Db} = couch_db:open_int(DbName, []), + {ok, DDoc} = couch_db:open_doc(Db, ?DDOC_ID, [ejson_body]), + {ok, Info} = couch_mrview:get_info(Db, DDoc), + ok = couch_db:close(Db), + external_size(Info). + active_size(Info) -> couch_util:get_nested_json_value({Info}, [sizes, active]). diff --git a/src/couch_mrview/src/couch_mrview_util.erl b/src/couch_mrview/src/couch_mrview_util.erl index a8462a2d8..632522240 100644 --- a/src/couch_mrview/src/couch_mrview_util.erl +++ b/src/couch_mrview/src/couch_mrview_util.erl @@ -338,7 +338,10 @@ temp_view_to_ddoc({Props}) -> get_row_count(#mrview{btree=Bt}) -> - {ok, {Count, _Reds}} = couch_btree:full_reduce(Bt), + Count = case couch_btree:full_reduce(Bt) of + {ok, {Count0, _Reds, _}} -> Count0; + {ok, {Count0, _Reds}} -> Count0 + end, {ok, Count}. @@ -786,27 +789,33 @@ changes_ekey_opts(_StartSeq, #mrargs{end_key=EKey, end. +reduced_external_size(Tree) -> + case couch_btree:full_reduce(Tree) of + {ok, {_, _, Size}} -> Size; + % return 0 for versions of the reduce function without Size + {ok, {_, _}} -> 0 + end. calculate_external_size(Views) -> SumFun = fun(#mrview{btree=Bt, seq_btree=SBt, key_byseq_btree=KSBt}, Acc) -> - Size0 = sum_btree_sizes(Acc, couch_btree:size(Bt)), + Size0 = sum_btree_sizes(Acc, reduced_external_size(Bt)), Size1 = case SBt of nil -> Size0; - _ -> sum_btree_sizes(Size0, couch_btree:size(SBt)) + _ -> sum_btree_sizes(Size0, reduced_external_size(SBt)) end, case KSBt of nil -> Size1; - _ -> sum_btree_sizes(Size1, couch_btree:size(KSBt)) + _ -> sum_btree_sizes(Size1, reduced_external_size(KSBt)) end end, {ok, lists:foldl(SumFun, 0, Views)}. sum_btree_sizes(nil, _) -> - null; + 0; sum_btree_sizes(_, nil) -> - null; + 0; sum_btree_sizes(Size1, Size2) -> Size1 + Size2. @@ -1038,22 +1047,32 @@ get_user_reds(Reduction) -> element(2, Reduction). +get_external_size_reds(Reduction) when tuple_size(Reduction) == 2 -> + 0; + +get_external_size_reds(Reduction) when tuple_size(Reduction) == 3 -> + element(3, Reduction). + + make_reduce_fun(Lang, ReduceFuns) -> FunSrcs = [FunSrc || {_, FunSrc} <- ReduceFuns], fun (reduce, KVs0) -> KVs = detuple_kvs(expand_dups(KVs0, []), []), {ok, Result} = couch_query_servers:reduce(Lang, FunSrcs, KVs), - {length(KVs), Result}; + ExternalSize = kv_external_size(KVs, Result), + {length(KVs), Result, ExternalSize}; (rereduce, Reds) -> - ExtractFun = fun(Red, {CountsAcc0, URedsAcc0}) -> + ExtractFun = fun(Red, {CountsAcc0, URedsAcc0, ExtAcc0}) -> CountsAcc = CountsAcc0 + get_count(Red), URedsAcc = lists:append(URedsAcc0, [get_user_reds(Red)]), - {CountsAcc, URedsAcc} + ExtAcc = ExtAcc0 + get_external_size_reds(Red), + {CountsAcc, URedsAcc, ExtAcc} end, - {Counts, UReds} = lists:foldl(ExtractFun, {0, []}, Reds), + {Counts, UReds, ExternalSize} = lists:foldl(ExtractFun, + {0, [], 0}, Reds), {ok, Result} = couch_query_servers:rereduce(Lang, FunSrcs, UReds), - {Counts, Result} + {Counts, Result, ExternalSize} end. @@ -1130,3 +1149,9 @@ get_view_queries({Props}) -> _ -> throw({bad_request, "`queries` member must be a array."}) end. + + +kv_external_size(KVList, Reduction) -> + lists:foldl(fun([[Key, _], Value], Acc) -> + ?term_size(Key) + ?term_size(Value) + Acc + end, ?term_size(Reduction), KVList). -- cgit v1.2.1