summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGarren Smith <garren.smith@gmail.com>2020-01-20 12:08:06 +0200
committerGarren Smith <garren.smith@gmail.com>2020-03-04 13:23:40 +0200
commit4bd68d1e03653a0b44ee69818be8e39b511d110d (patch)
treef98e6e808b7634385d1da5ac6051230ddaf2efd5
parentee150f4251e00e88c00dc38b387c7ee4482f0bf9 (diff)
downloadcouchdb-4bd68d1e03653a0b44ee69818be8e39b511d110d.tar.gz
Add Key/Value size limit for map indexes
-rw-r--r--rel/overlay/etc/default.ini4
-rw-r--r--src/couch_views/src/couch_views_fdb.erl104
-rw-r--r--src/couch_views/src/couch_views_indexer.erl4
-rw-r--r--src/couch_views/test/couch_views_indexer_test.erl154
4 files changed, 227 insertions, 39 deletions
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index f2a81875c..4c978b29c 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -272,6 +272,10 @@ iterations = 10 ; iterations for password hashing
; Settings for view indexing
[couch_views]
; max_workers = 100
+; The maximum allowed key size emitted from a view for a document (in bytes)
+; key_size_limit = 8000
+; The maximum allowed value size emitted from a view for a document (in bytes)
+; value_size_limit = 64000
; CSP (Content Security Policy) Support for _utils
[csp]
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl
index 5edaa3a5f..47196f7dc 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -32,6 +32,8 @@
-define(LIST_VALUE, 0).
-define(JSON_VALUE, 1).
-define(VALUE, 2).
+-define(MAX_KEY_SIZE_LIMIT, 8000).
+-define(MAX_VALUE_SIZE_LIMIT, 64000).
-include("couch_views.hrl").
@@ -107,7 +109,7 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) ->
Acc1.
-write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) ->
+write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) ->
#{
id := DocId
} = Doc,
@@ -115,13 +117,11 @@ write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) ->
ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
clear_id_idx(TxDb, Sig, DocId),
- lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) ->
- clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys),
- update_row_count(TxDb, Sig, ViewId, -TotalKeys),
- update_kv_size(TxDb, Sig, ViewId, -TotalSize)
+ lists:foreach(fun(ExistingViewKey) ->
+ remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey)
end, ExistingViewKeys);
-write_doc(TxDb, Sig, ViewIds, Doc) ->
+write_doc(TxDb, Sig, Views, Doc) ->
#{
id := DocId,
results := Results
@@ -130,26 +130,54 @@ write_doc(TxDb, Sig, ViewIds, Doc) ->
ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
clear_id_idx(TxDb, Sig, DocId),
-
- lists:foreach(fun({ViewId, NewRows}) ->
- update_id_idx(TxDb, Sig, ViewId, DocId, NewRows),
-
- ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
- {ViewId, TotalRows, TotalSize, EKeys} ->
- RowChange = length(NewRows) - TotalRows,
- SizeChange = calculate_row_size(NewRows) - TotalSize,
- update_row_count(TxDb, Sig, ViewId, RowChange),
- update_kv_size(TxDb, Sig, ViewId, SizeChange),
- EKeys;
- false ->
- RowChange = length(NewRows),
- SizeChange = calculate_row_size(NewRows),
- update_row_count(TxDb, Sig, ViewId, RowChange),
- update_kv_size(TxDb, Sig, ViewId, SizeChange),
- []
- end,
- update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows)
- end, lists:zip(ViewIds, Results)).
+ lists:foreach(fun({View, NewRows}) ->
+ #mrview{
+ map_names = MNames,
+ id_num = ViewId
+ } = View,
+
+ try
+ NewRowSize = calculate_row_size(NewRows),
+ update_id_idx(TxDb, Sig, ViewId, DocId, NewRows),
+
+ ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
+ {ViewId, TotalRows, TotalSize, EKeys} ->
+ RowChange = length(NewRows) - TotalRows,
+ SizeChange = NewRowSize - TotalSize,
+ update_row_count(TxDb, Sig, ViewId, RowChange),
+ update_kv_size(TxDb, Sig, ViewId, SizeChange),
+ EKeys;
+ false ->
+ RowChange = length(NewRows),
+ SizeChange = NewRowSize,
+ update_row_count(TxDb, Sig, ViewId, RowChange),
+ update_kv_size(TxDb, Sig, ViewId, SizeChange),
+ []
+ end,
+ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows)
+ catch
+ throw:{size_exceeded, Type} ->
+ case lists:keyfind(ViewId, 1, ExistingViewKeys) of
+ false ->
+ ok;
+ ExistingViewKey ->
+ remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey)
+ end,
+ #{
+ name := DbName
+ } = TxDb,
+ couch_log:error("Db `~s` Doc `~s` exceeded the ~s size "
+ "for view `~s` and was not indexed.",
+ [DbName, DocId, Type, MNames])
+ end
+ end, lists:zip(Views, Results)).
+
+
+remove_doc_from_idx(TxDb, Sig, DocId, {ViewId, TotalKeys, TotalSize,
+ UniqueKeys}) ->
+ clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys),
+ update_row_count(TxDb, Sig, ViewId, -TotalKeys),
+ update_kv_size(TxDb, Sig, ViewId, -TotalSize).
% For each row in a map view we store the the key/value
@@ -352,6 +380,28 @@ process_rows(Rows) ->
calculate_row_size(Rows) ->
+ KeyLimit = key_size_limit(),
+ ValLimit = value_size_limit(),
+
lists:foldl(fun({K, V}, Acc) ->
- Acc + erlang:external_size(K) + erlang:external_size(V)
+ KeySize = erlang:external_size(K),
+ ValSize = erlang:external_size(V),
+
+ if KeySize =< KeyLimit -> ok; true ->
+ throw({size_exceeded, key})
+ end,
+
+ if ValSize =< ValLimit -> ok; true ->
+ throw({size_exceeded, value})
+ end,
+
+ Acc + KeySize + ValSize
end, 0, Rows).
+
+
+key_size_limit() ->
+ config:get_integer("couch_views", "key_size_limit", ?MAX_KEY_SIZE_LIMIT).
+
+
+value_size_limit() ->
+ config:get_integer("couch_views", "value_size_limit", ?MAX_VALUE_SIZE_LIMIT).
diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl
index 31cd8e6f1..0a57a70ee 100644
--- a/src/couch_views/src/couch_views_indexer.erl
+++ b/src/couch_views/src/couch_views_indexer.erl
@@ -297,10 +297,8 @@ write_docs(TxDb, Mrst, Docs, State) ->
last_seq := LastSeq
} = State,
- ViewIds = [View#mrview.id_num || View <- Views],
-
lists:foreach(fun(Doc) ->
- couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc)
+ couch_views_fdb:write_doc(TxDb, Sig, Views, Doc)
end, Docs),
couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq).
diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl
index 3070cc0a1..17adc42ec 100644
--- a/src/couch_views/test/couch_views_indexer_test.erl
+++ b/src/couch_views/test/couch_views_indexer_test.erl
@@ -30,15 +30,17 @@ indexer_test_() ->
fun foreach_setup/0,
fun foreach_teardown/1,
[
- with([?TDEF(indexed_empty_db)]),
- with([?TDEF(indexed_single_doc)]),
- with([?TDEF(updated_docs_are_reindexed)]),
- with([?TDEF(updated_docs_without_changes_are_reindexed)]),
- with([?TDEF(deleted_docs_not_indexed)]),
- with([?TDEF(deleted_docs_are_unindexed)]),
- with([?TDEF(multipe_docs_with_same_key)]),
- with([?TDEF(multipe_keys_from_same_doc)]),
- with([?TDEF(multipe_identical_keys_from_same_doc)])
+ ?TDEF_FE(indexed_empty_db),
+ ?TDEF_FE(indexed_single_doc),
+ ?TDEF_FE(updated_docs_are_reindexed),
+ ?TDEF_FE(updated_docs_without_changes_are_reindexed),
+ ?TDEF_FE(deleted_docs_not_indexed),
+ ?TDEF_FE(deleted_docs_are_unindexed),
+ ?TDEF_FE(multipe_docs_with_same_key),
+ ?TDEF_FE(multipe_keys_from_same_doc),
+ ?TDEF_FE(multipe_identical_keys_from_same_doc),
+ ?TDEF_FE(handle_size_key_limits),
+ ?TDEF_FE(handle_size_value_limits)
]
}
}
@@ -65,6 +67,7 @@ foreach_setup() ->
foreach_teardown(Db) ->
+ meck:unload(),
ok = fabric2_db:delete(fabric2_db:name(Db), []).
@@ -385,6 +388,113 @@ multipe_identical_keys_from_same_doc(Db) ->
], Out).
+handle_size_key_limits(Db) ->
+ ok = meck:new(config, [passthrough]),
+ ok = meck:expect(config, get_integer, fun(Section, Key, Default) ->
+ case Section == "couch_views" andalso Key == "key_size_limit" of
+ true -> 15;
+ _ -> Default
+ end
+ end),
+
+ DDoc = create_ddoc(multi_emit_key_limit),
+ Docs = [doc(1)] ++ [doc(2)],
+
+ {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []),
+
+ {ok, Out} = couch_views:query(
+ Db,
+ DDoc,
+ <<"map_fun1">>,
+ fun fold_fun/2,
+ [],
+ #mrargs{}
+ ),
+
+ ?assertEqual([
+ row(<<"1">>, 1, 1)
+ ], Out),
+
+ {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>),
+ Doc2 = Doc#doc {
+ body = {[{<<"val">>,3}]}
+ },
+ {ok, _} = fabric2_db:update_doc(Db, Doc2),
+
+ {ok, Out1} = couch_views:query(
+ Db,
+ DDoc,
+ <<"map_fun1">>,
+ fun fold_fun/2,
+ [],
+ #mrargs{}
+ ),
+
+ ?assertEqual([
+ row(<<"1">>, 1, 1),
+ row(<<"2">>, 3, 3)
+ ], Out1).
+
+
+handle_size_value_limits(Db) ->
+ ok = meck:new(config, [passthrough]),
+ ok = meck:expect(config, get_integer, fun(Section, _, Default) ->
+ case Section of
+ "couch_views" -> 15;
+ _ -> Default
+ end
+ end),
+
+ DDoc = create_ddoc(multi_emit_key_limit),
+ Docs = [doc(1, 2)] ++ [doc(2, 3)],
+
+ {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []),
+
+ {ok, Out} = couch_views:query(
+ Db,
+ DDoc,
+ <<"map_fun2">>,
+ fun fold_fun/2,
+ [],
+ #mrargs{}
+ ),
+
+ ?assertEqual([
+ row(<<"1">>, 2, 2),
+ row(<<"2">>, 3, 3),
+ row(<<"1">>, 22, 2),
+ row(<<"2">>, 23, 3)
+ ], Out),
+
+
+ {ok, Doc} = fabric2_db:open_doc(Db, <<"1">>),
+ Doc2 = Doc#doc {
+ body = {[{<<"val">>,1}]}
+ },
+ {ok, _} = fabric2_db:update_doc(Db, Doc2),
+
+ {ok, Out1} = couch_views:query(
+ Db,
+ DDoc,
+ <<"map_fun2">>,
+ fun fold_fun/2,
+ [],
+ #mrargs{}
+ ),
+
+ ?assertEqual([
+ row(<<"2">>, 3, 3),
+ row(<<"2">>, 23, 3)
+ ], Out1).
+
+
+row(Id, Key, Value) ->
+ {row, [
+ {id, Id},
+ {key, Key},
+ {value, Value}
+ ]}.
+
fold_fun({meta, _Meta}, Acc) ->
{ok, Acc};
fold_fun({row, _} = Row, Acc) ->
@@ -440,6 +550,32 @@ create_ddoc(multi_emit_same) ->
{<<"map">>, <<"function(doc) {}">>}
]}}
]}}
+ ]});
+
+create_ddoc(multi_emit_key_limit) ->
+ couch_doc:from_json_obj({[
+ {<<"_id">>, <<"_design/bar">>},
+ {<<"views">>, {[
+ {<<"map_fun1">>, {[
+ {<<"map">>, <<"function(doc) { "
+ "if (doc.val === 2) { "
+ "emit('a very long string to be limited', doc.val);"
+ "} else {"
+ "emit(doc.val, doc.val)"
+ "}"
+ "}">>}
+ ]}},
+ {<<"map_fun2">>, {[
+ {<<"map">>, <<"function(doc) { "
+ "emit(doc.val + 20, doc.val);"
+ "if (doc.val === 1) { "
+ "emit(doc.val, 'a very long string to be limited');"
+ "} else {"
+ "emit(doc.val, doc.val)"
+ "}"
+ "}">>}
+ ]}}
+ ]}}
]}).