diff options
author | Adam Kocoloski <kocolosk@apache.org> | 2018-05-28 09:55:09 -0400 |
---|---|---|
committer | Adam Kocoloski <kocolosk@apache.org> | 2018-05-28 09:55:09 -0400 |
commit | ebf808d3e4e462f46fb0af7dd453bc10668bdb04 (patch) | |
tree | ba57aca6f421ac87bfbe86075afdbc91c3b5b931 | |
parent | 25bc44e805e2d685bee633ef9bfa833eda5b3796 (diff) | |
download | couchdb-ebf808d3e4e462f46fb0af7dd453bc10668bdb04.tar.gz |
Squash ee32cd58 to rebase COUCHDB-2971 work
commit ee32cd5825aaf63448651c9521f0927083d2281e
Author: Adam Kocoloski <kocolosk@apache.org>
Date: Wed Mar 1 09:28:45 2017 -0500
Add a cardinality estimator builtin reduce
This introduces a _distinct builtin reduce function, which uses a
HyperLogLog algorithm to estimate the number of distinct keys in the
view index. The precision is currently fixed to 2^11 observables and
therefore uses approximately 1.5 KB of memory.
COUCHDB-2971
-rw-r--r-- | src/couch/src/couch_query_servers.erl | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl index f31d24c6c..42b0a4de7 100644 --- a/src/couch/src/couch_query_servers.erl +++ b/src/couch/src/couch_query_servers.erl @@ -17,6 +17,7 @@ -export([reduce/3, rereduce/3,validate_doc_update/5]). -export([filter_docs/5]). -export([filter_view/3]). +-export([finalize/1]). -export([rewrite/3]). -export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]). @@ -86,6 +87,16 @@ group_reductions_results(List) -> [Heads | group_reductions_results(Tails)] end. +finalize(Reductions) -> + {ok, lists:map(fun(Reduction) -> + case hyper:is_hyper(Reduction) of + true -> + hyper:card(Reduction); + false -> + Reduction + end + end, Reductions)}. + rereduce(_Lang, [], _ReducedValues) -> {ok, []}; rereduce(Lang, RedSrcs, ReducedValues) -> @@ -171,7 +182,10 @@ builtin_reduce(rereduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) -> builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]); builtin_reduce(Re, [<<"_stats",_/binary>>|BuiltinReds], KVs, Acc) -> Stats = builtin_stats(Re, KVs), - builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]). + builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]); +builtin_reduce(Re, [<<"_distinct",_/binary>>|BuiltinReds], KVs, Acc) -> + Distinct = count_distinct_keys(Re, KVs), + builtin_reduce(Re, BuiltinReds, KVs, [Distinct|Acc]). builtin_sum_rows([], Acc) -> @@ -303,6 +317,13 @@ get_number(Key, Props) -> throw({invalid_value, iolist_to_binary(Msg)}) end. +% TODO allow customization of precision in the ddoc. +count_distinct_keys(reduce, KVs) -> + lists:foldl(fun([[Key, _Id], _Value], Filter) -> + hyper:insert(term_to_binary(Key), Filter) + end, hyper:new(11), KVs); +count_distinct_keys(rereduce, Reds) -> + hyper:union([Filter || [_, Filter] <- Reds]). % use the function stored in ddoc.validate_doc_update to test an update. -spec validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> ok when |