diff options
author | Paul J. Davis <paul.joseph.davis@gmail.com> | 2020-03-04 15:28:23 -0600 |
---|---|---|
committer | Paul J. Davis <paul.joseph.davis@gmail.com> | 2020-03-05 11:13:14 -0600 |
commit | 2fe1666c590d98298e2b57b079d47394e74a9bca (patch) | |
tree | 164249271c5e5d33c99806e5e25fadd624a55c96 | |
parent | 735b3f67b31c55753b618f14a60c644dc088fdda (diff) | |
download | couchdb-2fe1666c590d98298e2b57b079d47394e74a9bca.tar.gz |
Clean up view size limit enforcement
Both a performance and style cleanup. This reduces the number of ets
calls by roughly `2 * 2 * $num_docs * $num_views`. The first factor of
two is because this avoids re-calculating the size twice for both the id
and map indexes. The second factor of two is because we have to lookup
two different settings.
Stylistically this also moves the logic out of the fdb modules. Keeping
key/value structure and update logic is already pretty complicated so we
should avoid mixing external application logic into those modules as
much as possible for our sanity.
The behavior is slightly changed vs the original patch as well.
Originally only the view that contained the errant key or value was
affected. However, pre-existing behavior where a document fails to be
mapped correctly resulted in it being excluded from all view indexes
defined in the design document. That behavior has been restored.
-rw-r--r-- | src/couch_views/src/couch_views_fdb.erl | 114 | ||||
-rw-r--r-- | src/couch_views/src/couch_views_indexer.erl | 54 | ||||
-rw-r--r-- | src/couch_views/test/couch_views_indexer_test.erl | 12 |
3 files changed, 86 insertions, 94 deletions
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 47196f7dc..f68dafc41 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -32,8 +32,6 @@ -define(LIST_VALUE, 0). -define(JSON_VALUE, 1). -define(VALUE, 2). --define(MAX_KEY_SIZE_LIMIT, 8000). --define(MAX_VALUE_SIZE_LIMIT, 64000). -include("couch_views.hrl"). @@ -109,7 +107,7 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> Acc1. -write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) -> +write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> #{ id := DocId } = Doc, @@ -117,67 +115,40 @@ write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) -> ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun(ExistingViewKey) -> - remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) + lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> + clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), + update_row_count(TxDb, Sig, ViewId, -TotalKeys), + update_kv_size(TxDb, Sig, ViewId, -TotalSize) end, ExistingViewKeys); -write_doc(TxDb, Sig, Views, Doc) -> +write_doc(TxDb, Sig, ViewIds, Doc) -> #{ id := DocId, - results := Results + results := Results, + kv_sizes := KVSizes } = Doc, ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun({View, NewRows}) -> - #mrview{ - map_names = MNames, - id_num = ViewId - } = View, - - try - NewRowSize = calculate_row_size(NewRows), - update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), - - ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of - {ViewId, TotalRows, TotalSize, EKeys} -> - RowChange = length(NewRows) - TotalRows, - SizeChange = NewRowSize - TotalSize, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - EKeys; - false -> - RowChange = length(NewRows), - SizeChange = NewRowSize, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - [] - end, - update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) - catch - throw:{size_exceeded, Type} -> - case lists:keyfind(ViewId, 1, ExistingViewKeys) of - false -> - ok; - ExistingViewKey -> - remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) - end, - #{ - name := DbName - } = TxDb, - couch_log:error("Db `~s` Doc `~s` exceeded the ~s size " - "for view `~s` and was not indexed.", - [DbName, DocId, Type, MNames]) - end - end, lists:zip(Views, Results)). - - -remove_doc_from_idx(TxDb, Sig, DocId, {ViewId, TotalKeys, TotalSize, - UniqueKeys}) -> - clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), - update_row_count(TxDb, Sig, ViewId, -TotalKeys), - update_kv_size(TxDb, Sig, ViewId, -TotalSize). + + lists:foreach(fun({ViewId, NewRows, KVSize}) -> + update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize), + + ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of + {ViewId, TotalRows, TotalSize, EKeys} -> + RowChange = length(NewRows) - TotalRows, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize), + EKeys; + false -> + RowChange = length(NewRows), + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, KVSize), + [] + end, + update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) + end, lists:zip3(ViewIds, Results, KVSizes)). % For each row in a map view we store the the key/value @@ -234,7 +205,7 @@ clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) -> end, ViewKeys). -update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) -> +update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) -> #{ tx := Tx, db_prefix := DbPrefix @@ -243,8 +214,7 @@ update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) -> Unique = lists:usort([K || {K, _V} <- NewRows]), Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), - RowSize = calculate_row_size(NewRows), - Val = couch_views_encoding:encode([length(NewRows), RowSize, Unique]), + Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]), ok = erlfdb:set(Tx, Key, Val). @@ -377,31 +347,3 @@ process_rows(Rows) -> end, {0, []}, Vals1), Labeled ++ DAcc end, [], Grouped). - - -calculate_row_size(Rows) -> - KeyLimit = key_size_limit(), - ValLimit = value_size_limit(), - - lists:foldl(fun({K, V}, Acc) -> - KeySize = erlang:external_size(K), - ValSize = erlang:external_size(V), - - if KeySize =< KeyLimit -> ok; true -> - throw({size_exceeded, key}) - end, - - if ValSize =< ValLimit -> ok; true -> - throw({size_exceeded, value}) - end, - - Acc + KeySize + ValSize - end, 0, Rows). - - -key_size_limit() -> - config:get_integer("couch_views", "key_size_limit", ?MAX_KEY_SIZE_LIMIT). - - -value_size_limit() -> - config:get_integer("couch_views", "value_size_limit", ?MAX_VALUE_SIZE_LIMIT). diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 0a57a70ee..83f7e2851 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -29,6 +29,8 @@ % TODO: % * Handle timeouts of transaction and other errors +-define(KEY_SIZE_LIMIT, 8000). +-define(VALUE_SIZE_LIMIT, 64000). spawn_link() -> proc_lib:spawn_link(?MODULE, init, []). @@ -297,8 +299,13 @@ write_docs(TxDb, Mrst, Docs, State) -> last_seq := LastSeq } = State, - lists:foreach(fun(Doc) -> - couch_views_fdb:write_doc(TxDb, Sig, Views, Doc) + ViewIds = [View#mrview.id_num || View <- Views], + KeyLimit = key_size_limit(), + ValLimit = value_size_limit(), + + lists:foreach(fun(Doc0) -> + Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), + couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1) end, Docs), couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). @@ -368,6 +375,41 @@ start_query_server(#mrst{} = Mrst) -> Mrst. +calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) -> + #mrst{ + db_name = DbName, + idx_name = IdxName + } = Mrst, + #{ + results := Results + } = Doc, + try + KVSizes = lists:map(fun(ViewRows) -> + lists:foldl(fun({K, V}, Acc) -> + KeySize = erlang:external_size(K), + ValSize = erlang:external_size(V), + + if KeySize =< KeyLimit -> ok; true -> + throw({size_error, key}) + end, + + if ValSize =< ValLimit -> ok; true -> + throw({size_error, value}) + end, + + Acc + KeySize + ValSize + end, 0, ViewRows) + end, Results), + Doc#{kv_sizes => KVSizes} + catch throw:{size_error, Type} -> + #{id := DocId} = Doc, + Fmt = "View ~s size error for docid `~s`, excluded from indexing " + "in db `~s` for design doc `~s`", + couch_log:error(Fmt, [Type, DocId, DbName, IdxName]), + Doc#{deleted := true, results := [], kv_sizes => []} + end. + + report_progress(State, UpdateType) -> #{ tx_db := TxDb, @@ -419,3 +461,11 @@ num_changes() -> retry_limit() -> config:get_integer("couch_views", "retry_limit", 3). + + +key_size_limit() -> + config:get_integer("couch_views", "key_size_limit", ?KEY_SIZE_LIMIT). + + +value_size_limit() -> + config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 17adc42ec..9482fdd85 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -398,7 +398,7 @@ handle_size_key_limits(Db) -> end), DDoc = create_ddoc(multi_emit_key_limit), - Docs = [doc(1)] ++ [doc(2)], + Docs = [doc(1, 2)] ++ [doc(2, 1)], {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), @@ -412,12 +412,12 @@ handle_size_key_limits(Db) -> ), ?assertEqual([ - row(<<"1">>, 1, 1) + row(<<"1">>, 2, 2) ], Out), {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), Doc2 = Doc#doc { - body = {[{<<"val">>,3}]} + body = {[{<<"val">>, 2}]} }, {ok, _} = fabric2_db:update_doc(Db, Doc2), @@ -431,8 +431,8 @@ handle_size_key_limits(Db) -> ), ?assertEqual([ - row(<<"1">>, 1, 1), - row(<<"2">>, 3, 3) + row(<<"1">>, 2, 2), + row(<<"2">>, 2, 2) ], Out1). @@ -558,7 +558,7 @@ create_ddoc(multi_emit_key_limit) -> {<<"views">>, {[ {<<"map_fun1">>, {[ {<<"map">>, <<"function(doc) { " - "if (doc.val === 2) { " + "if (doc.val === 1) { " "emit('a very long string to be limited', doc.val);" "} else {" "emit(doc.val, doc.val)" |