diff options
author | Garren Smith <garren.smith@gmail.com> | 2020-01-20 12:08:06 +0200 |
---|---|---|
committer | Garren Smith <garren.smith@gmail.com> | 2020-03-04 13:23:40 +0200 |
commit | 4bd68d1e03653a0b44ee69818be8e39b511d110d (patch) | |
tree | f98e6e808b7634385d1da5ac6051230ddaf2efd5 | |
parent | ee150f4251e00e88c00dc38b387c7ee4482f0bf9 (diff) | |
download | couchdb-4bd68d1e03653a0b44ee69818be8e39b511d110d.tar.gz |
Add Key/Value size limit for map indexes
-rw-r--r-- | rel/overlay/etc/default.ini | 4 | ||||
-rw-r--r-- | src/couch_views/src/couch_views_fdb.erl | 104 | ||||
-rw-r--r-- | src/couch_views/src/couch_views_indexer.erl | 4 | ||||
-rw-r--r-- | src/couch_views/test/couch_views_indexer_test.erl | 154 |
4 files changed, 227 insertions, 39 deletions
diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini index f2a81875c..4c978b29c 100644 --- a/rel/overlay/etc/default.ini +++ b/rel/overlay/etc/default.ini @@ -272,6 +272,10 @@ iterations = 10 ; iterations for password hashing ; Settings for view indexing [couch_views] ; max_workers = 100 +; The maximum allowed key size emitted from a view for a document (in bytes) +; key_size_limit = 8000 +; The maximum allowed value size emitted from a view for a document (in bytes) +; value_size_limit = 64000 ; CSP (Content Security Policy) Support for _utils [csp] diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 5edaa3a5f..47196f7dc 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -32,6 +32,8 @@ -define(LIST_VALUE, 0). -define(JSON_VALUE, 1). -define(VALUE, 2). +-define(MAX_KEY_SIZE_LIMIT, 8000). +-define(MAX_VALUE_SIZE_LIMIT, 64000). -include("couch_views.hrl"). @@ -107,7 +109,7 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> Acc1. -write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> +write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) -> #{ id := DocId } = Doc, @@ -115,13 +117,11 @@ write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> - clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), - update_row_count(TxDb, Sig, ViewId, -TotalKeys), - update_kv_size(TxDb, Sig, ViewId, -TotalSize) + lists:foreach(fun(ExistingViewKey) -> + remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) end, ExistingViewKeys); -write_doc(TxDb, Sig, ViewIds, Doc) -> +write_doc(TxDb, Sig, Views, Doc) -> #{ id := DocId, results := Results @@ -130,26 +130,54 @@ write_doc(TxDb, Sig, ViewIds, Doc) -> ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - - lists:foreach(fun({ViewId, NewRows}) -> - update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), - - ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of - {ViewId, TotalRows, TotalSize, EKeys} -> - RowChange = length(NewRows) - TotalRows, - SizeChange = calculate_row_size(NewRows) - TotalSize, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - EKeys; - false -> - RowChange = length(NewRows), - SizeChange = calculate_row_size(NewRows), - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - [] - end, - update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) - end, lists:zip(ViewIds, Results)). + lists:foreach(fun({View, NewRows}) -> + #mrview{ + map_names = MNames, + id_num = ViewId + } = View, + + try + NewRowSize = calculate_row_size(NewRows), + update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), + + ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of + {ViewId, TotalRows, TotalSize, EKeys} -> + RowChange = length(NewRows) - TotalRows, + SizeChange = NewRowSize - TotalSize, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, SizeChange), + EKeys; + false -> + RowChange = length(NewRows), + SizeChange = NewRowSize, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, SizeChange), + [] + end, + update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) + catch + throw:{size_exceeded, Type} -> + case lists:keyfind(ViewId, 1, ExistingViewKeys) of + false -> + ok; + ExistingViewKey -> + remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) + end, + #{ + name := DbName + } = TxDb, + couch_log:error("Db `~s` Doc `~s` exceeded the ~s size " + "for view `~s` and was not indexed.", + [DbName, DocId, Type, MNames]) + end + end, lists:zip(Views, Results)). + + +remove_doc_from_idx(TxDb, Sig, DocId, {ViewId, TotalKeys, TotalSize, + UniqueKeys}) -> + clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), + update_row_count(TxDb, Sig, ViewId, -TotalKeys), + update_kv_size(TxDb, Sig, ViewId, -TotalSize). % For each row in a map view we store the the key/value @@ -352,6 +380,28 @@ process_rows(Rows) -> calculate_row_size(Rows) -> + KeyLimit = key_size_limit(), + ValLimit = value_size_limit(), + lists:foldl(fun({K, V}, Acc) -> - Acc + erlang:external_size(K) + erlang:external_size(V) + KeySize = erlang:external_size(K), + ValSize = erlang:external_size(V), + + if KeySize =< KeyLimit -> ok; true -> + throw({size_exceeded, key}) + end, + + if ValSize =< ValLimit -> ok; true -> + throw({size_exceeded, value}) + end, + + Acc + KeySize + ValSize end, 0, Rows). + + +key_size_limit() -> + config:get_integer("couch_views", "key_size_limit", ?MAX_KEY_SIZE_LIMIT). + + +value_size_limit() -> + config:get_integer("couch_views", "value_size_limit", ?MAX_VALUE_SIZE_LIMIT). diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 31cd8e6f1..0a57a70ee 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -297,10 +297,8 @@ write_docs(TxDb, Mrst, Docs, State) -> last_seq := LastSeq } = State, - ViewIds = [View#mrview.id_num || View <- Views], - lists:foreach(fun(Doc) -> - couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc) + couch_views_fdb:write_doc(TxDb, Sig, Views, Doc) end, Docs), couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 3070cc0a1..17adc42ec 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -30,15 +30,17 @@ indexer_test_() -> fun foreach_setup/0, fun foreach_teardown/1, [ - with([?TDEF(indexed_empty_db)]), - with([?TDEF(indexed_single_doc)]), - with([?TDEF(updated_docs_are_reindexed)]), - with([?TDEF(updated_docs_without_changes_are_reindexed)]), - with([?TDEF(deleted_docs_not_indexed)]), - with([?TDEF(deleted_docs_are_unindexed)]), - with([?TDEF(multipe_docs_with_same_key)]), - with([?TDEF(multipe_keys_from_same_doc)]), - with([?TDEF(multipe_identical_keys_from_same_doc)]) + ?TDEF_FE(indexed_empty_db), + ?TDEF_FE(indexed_single_doc), + ?TDEF_FE(updated_docs_are_reindexed), + ?TDEF_FE(updated_docs_without_changes_are_reindexed), + ?TDEF_FE(deleted_docs_not_indexed), + ?TDEF_FE(deleted_docs_are_unindexed), + ?TDEF_FE(multipe_docs_with_same_key), + ?TDEF_FE(multipe_keys_from_same_doc), + ?TDEF_FE(multipe_identical_keys_from_same_doc), + ?TDEF_FE(handle_size_key_limits), + ?TDEF_FE(handle_size_value_limits) ] } } @@ -65,6 +67,7 @@ foreach_setup() -> foreach_teardown(Db) -> + meck:unload(), ok = fabric2_db:delete(fabric2_db:name(Db), []). @@ -385,6 +388,113 @@ multipe_identical_keys_from_same_doc(Db) -> ], Out). +handle_size_key_limits(Db) -> + ok = meck:new(config, [passthrough]), + ok = meck:expect(config, get_integer, fun(Section, Key, Default) -> + case Section == "couch_views" andalso Key == "key_size_limit" of + true -> 15; + _ -> Default + end + end), + + DDoc = create_ddoc(multi_emit_key_limit), + Docs = [doc(1)] ++ [doc(2)], + + {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"1">>, 1, 1) + ], Out), + + {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), + Doc2 = Doc#doc { + body = {[{<<"val">>,3}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun1">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"1">>, 1, 1), + row(<<"2">>, 3, 3) + ], Out1). + + +handle_size_value_limits(Db) -> + ok = meck:new(config, [passthrough]), + ok = meck:expect(config, get_integer, fun(Section, _, Default) -> + case Section of + "couch_views" -> 15; + _ -> Default + end + end), + + DDoc = create_ddoc(multi_emit_key_limit), + Docs = [doc(1, 2)] ++ [doc(2, 3)], + + {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), + + {ok, Out} = couch_views:query( + Db, + DDoc, + <<"map_fun2">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"1">>, 2, 2), + row(<<"2">>, 3, 3), + row(<<"1">>, 22, 2), + row(<<"2">>, 23, 3) + ], Out), + + + {ok, Doc} = fabric2_db:open_doc(Db, <<"1">>), + Doc2 = Doc#doc { + body = {[{<<"val">>,1}]} + }, + {ok, _} = fabric2_db:update_doc(Db, Doc2), + + {ok, Out1} = couch_views:query( + Db, + DDoc, + <<"map_fun2">>, + fun fold_fun/2, + [], + #mrargs{} + ), + + ?assertEqual([ + row(<<"2">>, 3, 3), + row(<<"2">>, 23, 3) + ], Out1). + + +row(Id, Key, Value) -> + {row, [ + {id, Id}, + {key, Key}, + {value, Value} + ]}. + fold_fun({meta, _Meta}, Acc) -> {ok, Acc}; fold_fun({row, _} = Row, Acc) -> @@ -440,6 +550,32 @@ create_ddoc(multi_emit_same) -> {<<"map">>, <<"function(doc) {}">>} ]}} ]}} + ]}); + +create_ddoc(multi_emit_key_limit) -> + couch_doc:from_json_obj({[ + {<<"_id">>, <<"_design/bar">>}, + {<<"views">>, {[ + {<<"map_fun1">>, {[ + {<<"map">>, <<"function(doc) { " + "if (doc.val === 2) { " + "emit('a very long string to be limited', doc.val);" + "} else {" + "emit(doc.val, doc.val)" + "}" + "}">>} + ]}}, + {<<"map_fun2">>, {[ + {<<"map">>, <<"function(doc) { " + "emit(doc.val + 20, doc.val);" + "if (doc.val === 1) { " + "emit(doc.val, 'a very long string to be limited');" + "} else {" + "emit(doc.val, doc.val)" + "}" + "}">>} + ]}} + ]}} ]}). |