summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2018-08-22 14:54:31 -0500
committerPaul J. Davis <paul.joseph.davis@gmail.com>2018-08-23 13:57:11 -0500
commit2d141af2ccea6370d70b52996d68392b505f48f9 (patch)
treeaa24b8a0255c13e732a9bafbeb8371e3eccb6a67
parent2350cd4e0602a3ef4d0fa79096294e558ddaf475 (diff)
downloadcouchdb-fix-builtin-sum.tar.gz
Fix builtin _sum reduce functionfix-builtin-sum
The builting _sum reduce function has no protection against overflowing reduce values. Users can emit objects with enough unique keys to cause the builtin _sum to create objects that are exceedingly large in the inner nodes of the view B+Tree. This change adds the same logic that applies to JavaScript reduce functions to check if a reduce function is properly reducing its input.
-rw-r--r--src/couch/src/couch_query_servers.erl27
-rw-r--r--src/couch/test/couch_query_servers_tests.erl95
2 files changed, 121 insertions, 1 deletions
diff --git a/src/couch/src/couch_query_servers.erl b/src/couch/src/couch_query_servers.erl
index 7047364e2..c6d255f17 100644
--- a/src/couch/src/couch_query_servers.erl
+++ b/src/couch/src/couch_query_servers.erl
@@ -171,7 +171,8 @@ builtin_reduce(_Re, [], _KVs, Acc) ->
{ok, lists:reverse(Acc)};
builtin_reduce(Re, [<<"_sum",_/binary>>|BuiltinReds], KVs, Acc) ->
Sum = builtin_sum_rows(KVs, 0),
- builtin_reduce(Re, BuiltinReds, KVs, [Sum|Acc]);
+ Red = check_sum_overflow(?term_size(KVs), ?term_size(Sum), Sum),
+ builtin_reduce(Re, BuiltinReds, KVs, [Red|Acc]);
builtin_reduce(reduce, [<<"_count",_/binary>>|BuiltinReds], KVs, Acc) ->
Count = length(KVs),
builtin_reduce(reduce, BuiltinReds, KVs, [Count|Acc]);
@@ -247,6 +248,30 @@ sum_arrays([X|Xs], [Y|Ys]) when is_number(X), is_number(Y) ->
sum_arrays(Else, _) ->
throw_sum_error(Else).
+check_sum_overflow(InSize, OutSize, Sum) ->
+ Overflowed = OutSize > 4906 andalso OutSize * 2 > InSize,
+ case config:get("query_server_config", "reduce_limit", "true") of
+ "true" when Overflowed ->
+ Msg = log_sum_overflow(InSize, OutSize),
+ {[
+ {<<"error">>, <<"builtin_reduce_error">>},
+ {<<"reason">>, Msg}
+ ]};
+ "log" when Overflowed ->
+ log_sum_overflow(InSize, OutSize),
+ Sum;
+ _ ->
+ Sum
+ end.
+
+log_sum_overflow(InSize, OutSize) ->
+ Fmt = "Reduce output must shrink more rapidly: "
+ "input size: ~b "
+ "output size: ~b",
+ Msg = iolist_to_binary(io_lib:format(Fmt, [InSize, OutSize])),
+ couch_log:error(Msg, []),
+ Msg.
+
builtin_stats(_, []) ->
{0, 0, 0, 0, 0};
builtin_stats(_, [[_,First]|Rest]) ->
diff --git a/src/couch/test/couch_query_servers_tests.erl b/src/couch/test/couch_query_servers_tests.erl
new file mode 100644
index 000000000..f8df896c4
--- /dev/null
+++ b/src/couch/test/couch_query_servers_tests.erl
@@ -0,0 +1,95 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+% http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_query_servers_tests).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+
+setup() ->
+ meck:new([config, couch_log]).
+
+
+teardown(_) ->
+ meck:unload().
+
+
+sum_overflow_test_() ->
+ {
+ "Test overflow detection in the _sum reduce function",
+ {
+ setup,
+ fun setup/0,
+ fun teardown/1,
+ [
+ fun should_return_error_on_overflow/0,
+ fun should_return_object_on_log/0,
+ fun should_return_object_on_false/0
+ ]
+ }
+ }.
+
+
+should_return_error_on_overflow() ->
+ meck:reset([config, couch_log]),
+ meck:expect(
+ config, get, ["query_server_config", "reduce_limit", "true"],
+ "true"
+ ),
+ meck:expect(couch_log, error, ['_', '_'], ok),
+ KVs = gen_sum_kvs(),
+ {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+ ?assertMatch({[{<<"error">>, <<"builtin_reduce_error">>} | _]}, Result),
+ ?assert(meck:called(config, get, '_')),
+ ?assert(meck:called(couch_log, error, '_')).
+
+
+should_return_object_on_log() ->
+ meck:reset([config, couch_log]),
+ meck:expect(
+ config, get, ["query_server_config", "reduce_limit", "true"],
+ "log"
+ ),
+ meck:expect(couch_log, error, ['_', '_'], ok),
+ KVs = gen_sum_kvs(),
+ {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+ ?assertMatch({[_ | _]}, Result),
+ Keys = [K || {K, _} <- element(1, Result)],
+ ?assert(not lists:member(<<"error">>, Keys)),
+ ?assert(meck:called(config, get, '_')),
+ ?assert(meck:called(couch_log, error, '_')).
+
+
+should_return_object_on_false() ->
+ meck:reset([config, couch_log]),
+ meck:expect(
+ config, get, ["query_server_config", "reduce_limit", "true"],
+ "false"
+ ),
+ meck:expect(couch_log, error, ['_', '_'], ok),
+ KVs = gen_sum_kvs(),
+ {ok, [Result]} = couch_query_servers:reduce(<<"foo">>, [<<"_sum">>], KVs),
+ ?assertMatch({[_ | _]}, Result),
+ Keys = [K || {K, _} <- element(1, Result)],
+ ?assert(not lists:member(<<"error">>, Keys)),
+ ?assert(meck:called(config, get, '_')),
+ ?assertNot(meck:called(couch_log, error, '_')).
+
+
+gen_sum_kvs() ->
+ lists:map(fun(I) ->
+ Props = lists:map(fun(_) ->
+ K = couch_util:encodeBase64Url(crypto:strong_rand_bytes(16)),
+ {K, 1}
+ end, lists:seq(1, 20)),
+ [I, {Props}]
+ end, lists:seq(1, 10)).