summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Kocoloski <kocolosk@apache.org>2018-05-28 09:55:09 -0400
committerAdam Kocoloski <kocolosk@apache.org>2018-05-28 09:55:09 -0400
commitebf808d3e4e462f46fb0af7dd453bc10668bdb04 (patch)
treeba57aca6f421ac87bfbe86075afdbc91c3b5b931
parent25bc44e805e2d685bee633ef9bfa833eda5b3796 (diff)
downloadcouchdb-ebf808d3e4e462f46fb0af7dd453bc10668bdb04.tar.gz
Squash ee32cd58 to rebase COUCHDB-2971 work
commit ee32cd5825aaf63448651c9521f0927083d2281e Author: Adam Kocoloski <kocolosk@apache.org> Date: Wed Mar 1 09:28:45 2017 -0500 Add a cardinality estimator builtin reduce This introduces a _distinct builtin reduce function, which uses a HyperLogLog algorithm to estimate the number of distinct keys in the view index. The precision is currently fixed to 2^11 observables and therefore uses approximately 1.5 KB of memory. COUCHDB-2971
-rw-r--r--src/couch/src/couch_query_servers.erl23
1 files changed, 22 insertions, 1 deletions
diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl
index f31d24c6c..42b0a4de7 100644
--- a/src/couch/src/couch_query_servers.erl
+++ b/src/couch/src/couch_query_servers.erl
@@ -17,6 +17,7 @@
-export([reduce/3, rereduce/3,validate_doc_update/5]).
-export([filter_docs/5]).
-export([filter_view/3]).
+-export([finalize/1]).
-export([rewrite/3]).
-export([with_ddoc_proc/2, proc_prompt/2, ddoc_prompt/3, ddoc_proc_prompt/3, json_doc/1]).
@@ -86,6 +87,16 @@ group_reductions_results(List) ->
[Heads | group_reductions_results(Tails)]
end.
+finalize(Reductions) ->
+ {ok, lists:map(fun(Reduction) ->
+ case hyper:is_hyper(Reduction) of
+ true ->
+ hyper:card(Reduction);
+ false ->
+ Reduction
+ end
+ end, Reductions)}.
+
rereduce(_Lang, [], _ReducedValues) ->
{ok, []};
rereduce(Lang, RedSrcs, ReducedValues) ->
@@ -171,7 +182,10 @@ builtin_reduce(rereduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) ->
builtin_reduce(rereduce, BuiltinReds, KVs, [Count|Acc]);
builtin_reduce(Re, [<<"_stats",_/binary>>|BuiltinReds], KVs, Acc) ->
Stats = builtin_stats(Re, KVs),
- builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]).
+ builtin_reduce(Re, BuiltinReds, KVs, [Stats|Acc]);
+builtin_reduce(Re, [<<"_distinct",_/binary>>|BuiltinReds], KVs, Acc) ->
+ Distinct = count_distinct_keys(Re, KVs),
+ builtin_reduce(Re, BuiltinReds, KVs, [Distinct|Acc]).
builtin_sum_rows([], Acc) ->
@@ -303,6 +317,13 @@ get_number(Key, Props) ->
throw({invalid_value, iolist_to_binary(Msg)})
end.
+% TODO allow customization of precision in the ddoc.
+count_distinct_keys(reduce, KVs) ->
+ lists:foldl(fun([[Key, _Id], _Value], Filter) ->
+ hyper:insert(term_to_binary(Key), Filter)
+ end, hyper:new(11), KVs);
+count_distinct_keys(rereduce, Reds) ->
+ hyper:union([Filter || [_, Filter] <- Reds]).
% use the function stored in ddoc.validate_doc_update to test an update.
-spec validate_doc_update(DDoc, EditDoc, DiskDoc, Ctx, SecObj) -> ok when