summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul J. Davis <paul.joseph.davis@gmail.com>2020-03-04 15:28:23 -0600
committerPaul J. Davis <paul.joseph.davis@gmail.com>2020-03-04 15:38:01 -0600
commit06d57dfac024936f063db71600e7b09b1ed81128 (patch)
tree164249271c5e5d33c99806e5e25fadd624a55c96
parent735b3f67b31c55753b618f14a60c644dc088fdda (diff)
downloadcouchdb-prototype/fdb-layer-cleanup-view-size-limits.tar.gz
Clean up view size limit enforcementprototype/fdb-layer-cleanup-view-size-limits
Both a performance and style cleanup. This reduces the number of ets calls by roughly `2 * 2 * $num_docs * $num_views`. The first factor of two is because this avoids re-calculating the size twice for both the id and map indexes. The second factor of two is because we have to lookup two different settings. Stylistically this also moves the logic out of the fdb modules. Keeping key/value structure and update logic is already pretty complicated so we should avoid mixing external application logic into those modules as much as possible for our sanity. The behavior is slightly changed vs the original patch as well. Originally only the view that contained the errant key or value was affected. However, pre-existing behavior where a document fails to be mapped correctly resulted in it being excluded from all view indexes defined in the design document. That behavior has been restored.
-rw-r--r--src/couch_views/src/couch_views_fdb.erl114
-rw-r--r--src/couch_views/src/couch_views_indexer.erl54
-rw-r--r--src/couch_views/test/couch_views_indexer_test.erl12
3 files changed, 86 insertions, 94 deletions
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl
index 47196f7dc..f68dafc41 100644
--- a/src/couch_views/src/couch_views_fdb.erl
+++ b/src/couch_views/src/couch_views_fdb.erl
@@ -32,8 +32,6 @@
-define(LIST_VALUE, 0).
-define(JSON_VALUE, 1).
-define(VALUE, 2).
--define(MAX_KEY_SIZE_LIMIT, 8000).
--define(MAX_VALUE_SIZE_LIMIT, 64000).
-include("couch_views.hrl").
@@ -109,7 +107,7 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) ->
Acc1.
-write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) ->
+write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) ->
#{
id := DocId
} = Doc,
@@ -117,67 +115,40 @@ write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) ->
ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
clear_id_idx(TxDb, Sig, DocId),
- lists:foreach(fun(ExistingViewKey) ->
- remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey)
+ lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) ->
+ clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys),
+ update_row_count(TxDb, Sig, ViewId, -TotalKeys),
+ update_kv_size(TxDb, Sig, ViewId, -TotalSize)
end, ExistingViewKeys);
-write_doc(TxDb, Sig, Views, Doc) ->
+write_doc(TxDb, Sig, ViewIds, Doc) ->
#{
id := DocId,
- results := Results
+ results := Results,
+ kv_sizes := KVSizes
} = Doc,
ExistingViewKeys = get_view_keys(TxDb, Sig, DocId),
clear_id_idx(TxDb, Sig, DocId),
- lists:foreach(fun({View, NewRows}) ->
- #mrview{
- map_names = MNames,
- id_num = ViewId
- } = View,
-
- try
- NewRowSize = calculate_row_size(NewRows),
- update_id_idx(TxDb, Sig, ViewId, DocId, NewRows),
-
- ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
- {ViewId, TotalRows, TotalSize, EKeys} ->
- RowChange = length(NewRows) - TotalRows,
- SizeChange = NewRowSize - TotalSize,
- update_row_count(TxDb, Sig, ViewId, RowChange),
- update_kv_size(TxDb, Sig, ViewId, SizeChange),
- EKeys;
- false ->
- RowChange = length(NewRows),
- SizeChange = NewRowSize,
- update_row_count(TxDb, Sig, ViewId, RowChange),
- update_kv_size(TxDb, Sig, ViewId, SizeChange),
- []
- end,
- update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows)
- catch
- throw:{size_exceeded, Type} ->
- case lists:keyfind(ViewId, 1, ExistingViewKeys) of
- false ->
- ok;
- ExistingViewKey ->
- remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey)
- end,
- #{
- name := DbName
- } = TxDb,
- couch_log:error("Db `~s` Doc `~s` exceeded the ~s size "
- "for view `~s` and was not indexed.",
- [DbName, DocId, Type, MNames])
- end
- end, lists:zip(Views, Results)).
-
-
-remove_doc_from_idx(TxDb, Sig, DocId, {ViewId, TotalKeys, TotalSize,
- UniqueKeys}) ->
- clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys),
- update_row_count(TxDb, Sig, ViewId, -TotalKeys),
- update_kv_size(TxDb, Sig, ViewId, -TotalSize).
+
+ lists:foreach(fun({ViewId, NewRows, KVSize}) ->
+ update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize),
+
+ ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of
+ {ViewId, TotalRows, TotalSize, EKeys} ->
+ RowChange = length(NewRows) - TotalRows,
+ update_row_count(TxDb, Sig, ViewId, RowChange),
+ update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize),
+ EKeys;
+ false ->
+ RowChange = length(NewRows),
+ update_row_count(TxDb, Sig, ViewId, RowChange),
+ update_kv_size(TxDb, Sig, ViewId, KVSize),
+ []
+ end,
+ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows)
+ end, lists:zip3(ViewIds, Results, KVSizes)).
% For each row in a map view we store the the key/value
@@ -234,7 +205,7 @@ clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) ->
end, ViewKeys).
-update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) ->
+update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) ->
#{
tx := Tx,
db_prefix := DbPrefix
@@ -243,8 +214,7 @@ update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) ->
Unique = lists:usort([K || {K, _V} <- NewRows]),
Key = id_idx_key(DbPrefix, Sig, DocId, ViewId),
- RowSize = calculate_row_size(NewRows),
- Val = couch_views_encoding:encode([length(NewRows), RowSize, Unique]),
+ Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]),
ok = erlfdb:set(Tx, Key, Val).
@@ -377,31 +347,3 @@ process_rows(Rows) ->
end, {0, []}, Vals1),
Labeled ++ DAcc
end, [], Grouped).
-
-
-calculate_row_size(Rows) ->
- KeyLimit = key_size_limit(),
- ValLimit = value_size_limit(),
-
- lists:foldl(fun({K, V}, Acc) ->
- KeySize = erlang:external_size(K),
- ValSize = erlang:external_size(V),
-
- if KeySize =< KeyLimit -> ok; true ->
- throw({size_exceeded, key})
- end,
-
- if ValSize =< ValLimit -> ok; true ->
- throw({size_exceeded, value})
- end,
-
- Acc + KeySize + ValSize
- end, 0, Rows).
-
-
-key_size_limit() ->
- config:get_integer("couch_views", "key_size_limit", ?MAX_KEY_SIZE_LIMIT).
-
-
-value_size_limit() ->
- config:get_integer("couch_views", "value_size_limit", ?MAX_VALUE_SIZE_LIMIT).
diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl
index 0a57a70ee..83f7e2851 100644
--- a/src/couch_views/src/couch_views_indexer.erl
+++ b/src/couch_views/src/couch_views_indexer.erl
@@ -29,6 +29,8 @@
% TODO:
% * Handle timeouts of transaction and other errors
+-define(KEY_SIZE_LIMIT, 8000).
+-define(VALUE_SIZE_LIMIT, 64000).
spawn_link() ->
proc_lib:spawn_link(?MODULE, init, []).
@@ -297,8 +299,13 @@ write_docs(TxDb, Mrst, Docs, State) ->
last_seq := LastSeq
} = State,
- lists:foreach(fun(Doc) ->
- couch_views_fdb:write_doc(TxDb, Sig, Views, Doc)
+ ViewIds = [View#mrview.id_num || View <- Views],
+ KeyLimit = key_size_limit(),
+ ValLimit = value_size_limit(),
+
+ lists:foreach(fun(Doc0) ->
+ Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit),
+ couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1)
end, Docs),
couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq).
@@ -368,6 +375,41 @@ start_query_server(#mrst{} = Mrst) ->
Mrst.
+calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) ->
+ #mrst{
+ db_name = DbName,
+ idx_name = IdxName
+ } = Mrst,
+ #{
+ results := Results
+ } = Doc,
+ try
+ KVSizes = lists:map(fun(ViewRows) ->
+ lists:foldl(fun({K, V}, Acc) ->
+ KeySize = erlang:external_size(K),
+ ValSize = erlang:external_size(V),
+
+ if KeySize =< KeyLimit -> ok; true ->
+ throw({size_error, key})
+ end,
+
+ if ValSize =< ValLimit -> ok; true ->
+ throw({size_error, value})
+ end,
+
+ Acc + KeySize + ValSize
+ end, 0, ViewRows)
+ end, Results),
+ Doc#{kv_sizes => KVSizes}
+ catch throw:{size_error, Type} ->
+ #{id := DocId} = Doc,
+ Fmt = "View ~s size error for docid `~s`, excluded from indexing "
+ "in db `~s` for design doc `~s`",
+ couch_log:error(Fmt, [Type, DocId, DbName, IdxName]),
+ Doc#{deleted := true, results := [], kv_sizes => []}
+ end.
+
+
report_progress(State, UpdateType) ->
#{
tx_db := TxDb,
@@ -419,3 +461,11 @@ num_changes() ->
retry_limit() ->
config:get_integer("couch_views", "retry_limit", 3).
+
+
+key_size_limit() ->
+ config:get_integer("couch_views", "key_size_limit", ?KEY_SIZE_LIMIT).
+
+
+value_size_limit() ->
+ config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT).
diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl
index 17adc42ec..9482fdd85 100644
--- a/src/couch_views/test/couch_views_indexer_test.erl
+++ b/src/couch_views/test/couch_views_indexer_test.erl
@@ -398,7 +398,7 @@ handle_size_key_limits(Db) ->
end),
DDoc = create_ddoc(multi_emit_key_limit),
- Docs = [doc(1)] ++ [doc(2)],
+ Docs = [doc(1, 2)] ++ [doc(2, 1)],
{ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []),
@@ -412,12 +412,12 @@ handle_size_key_limits(Db) ->
),
?assertEqual([
- row(<<"1">>, 1, 1)
+ row(<<"1">>, 2, 2)
], Out),
{ok, Doc} = fabric2_db:open_doc(Db, <<"2">>),
Doc2 = Doc#doc {
- body = {[{<<"val">>,3}]}
+ body = {[{<<"val">>, 2}]}
},
{ok, _} = fabric2_db:update_doc(Db, Doc2),
@@ -431,8 +431,8 @@ handle_size_key_limits(Db) ->
),
?assertEqual([
- row(<<"1">>, 1, 1),
- row(<<"2">>, 3, 3)
+ row(<<"1">>, 2, 2),
+ row(<<"2">>, 2, 2)
], Out1).
@@ -558,7 +558,7 @@ create_ddoc(multi_emit_key_limit) ->
{<<"views">>, {[
{<<"map_fun1">>, {[
{<<"map">>, <<"function(doc) { "
- "if (doc.val === 2) { "
+ "if (doc.val === 1) { "
"emit('a very long string to be limited', doc.val);"
"} else {"
"emit(doc.val, doc.val)"