diff options
-rw-r--r-- | src/couch_views/src/couch_views_fdb.erl | 114 | ||||
-rw-r--r-- | src/couch_views/src/couch_views_indexer.erl | 54 | ||||
-rw-r--r-- | src/couch_views/test/couch_views_indexer_test.erl | 12 |
3 files changed, 86 insertions, 94 deletions
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index 47196f7dc..f68dafc41 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -32,8 +32,6 @@ -define(LIST_VALUE, 0). -define(JSON_VALUE, 1). -define(VALUE, 2). --define(MAX_KEY_SIZE_LIMIT, 8000). --define(MAX_VALUE_SIZE_LIMIT, 64000). -include("couch_views.hrl"). @@ -109,7 +107,7 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> Acc1. -write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) -> +write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> #{ id := DocId } = Doc, @@ -117,67 +115,40 @@ write_doc(TxDb, Sig, _Views, #{deleted := true} = Doc) -> ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun(ExistingViewKey) -> - remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) + lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> + clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), + update_row_count(TxDb, Sig, ViewId, -TotalKeys), + update_kv_size(TxDb, Sig, ViewId, -TotalSize) end, ExistingViewKeys); -write_doc(TxDb, Sig, Views, Doc) -> +write_doc(TxDb, Sig, ViewIds, Doc) -> #{ id := DocId, - results := Results + results := Results, + kv_sizes := KVSizes } = Doc, ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun({View, NewRows}) -> - #mrview{ - map_names = MNames, - id_num = ViewId - } = View, - - try - NewRowSize = calculate_row_size(NewRows), - update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), - - ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of - {ViewId, TotalRows, TotalSize, EKeys} -> - RowChange = length(NewRows) - TotalRows, - SizeChange = NewRowSize - TotalSize, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - EKeys; - false -> - RowChange = length(NewRows), - SizeChange = NewRowSize, - update_row_count(TxDb, Sig, ViewId, RowChange), - update_kv_size(TxDb, Sig, ViewId, SizeChange), - [] - end, - update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) - catch - throw:{size_exceeded, Type} -> - case lists:keyfind(ViewId, 1, ExistingViewKeys) of - false -> - ok; - ExistingViewKey -> - remove_doc_from_idx(TxDb, Sig, DocId, ExistingViewKey) - end, - #{ - name := DbName - } = TxDb, - couch_log:error("Db `~s` Doc `~s` exceeded the ~s size " - "for view `~s` and was not indexed.", - [DbName, DocId, Type, MNames]) - end - end, lists:zip(Views, Results)). - - -remove_doc_from_idx(TxDb, Sig, DocId, {ViewId, TotalKeys, TotalSize, - UniqueKeys}) -> - clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), - update_row_count(TxDb, Sig, ViewId, -TotalKeys), - update_kv_size(TxDb, Sig, ViewId, -TotalSize). + + lists:foreach(fun({ViewId, NewRows, KVSize}) -> + update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize), + + ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of + {ViewId, TotalRows, TotalSize, EKeys} -> + RowChange = length(NewRows) - TotalRows, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, KVSize - TotalSize), + EKeys; + false -> + RowChange = length(NewRows), + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, KVSize), + [] + end, + update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) + end, lists:zip3(ViewIds, Results, KVSizes)). % For each row in a map view we store the the key/value @@ -234,7 +205,7 @@ clear_map_idx(TxDb, Sig, ViewId, DocId, ViewKeys) -> end, ViewKeys). -update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) -> +update_id_idx(TxDb, Sig, ViewId, DocId, NewRows, KVSize) -> #{ tx := Tx, db_prefix := DbPrefix @@ -243,8 +214,7 @@ update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) -> Unique = lists:usort([K || {K, _V} <- NewRows]), Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), - RowSize = calculate_row_size(NewRows), - Val = couch_views_encoding:encode([length(NewRows), RowSize, Unique]), + Val = couch_views_encoding:encode([length(NewRows), KVSize, Unique]), ok = erlfdb:set(Tx, Key, Val). @@ -377,31 +347,3 @@ process_rows(Rows) -> end, {0, []}, Vals1), Labeled ++ DAcc end, [], Grouped). - - -calculate_row_size(Rows) -> - KeyLimit = key_size_limit(), - ValLimit = value_size_limit(), - - lists:foldl(fun({K, V}, Acc) -> - KeySize = erlang:external_size(K), - ValSize = erlang:external_size(V), - - if KeySize =< KeyLimit -> ok; true -> - throw({size_exceeded, key}) - end, - - if ValSize =< ValLimit -> ok; true -> - throw({size_exceeded, value}) - end, - - Acc + KeySize + ValSize - end, 0, Rows). - - -key_size_limit() -> - config:get_integer("couch_views", "key_size_limit", ?MAX_KEY_SIZE_LIMIT). - - -value_size_limit() -> - config:get_integer("couch_views", "value_size_limit", ?MAX_VALUE_SIZE_LIMIT). diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 0a57a70ee..83f7e2851 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -29,6 +29,8 @@ % TODO: % * Handle timeouts of transaction and other errors +-define(KEY_SIZE_LIMIT, 8000). +-define(VALUE_SIZE_LIMIT, 64000). spawn_link() -> proc_lib:spawn_link(?MODULE, init, []). @@ -297,8 +299,13 @@ write_docs(TxDb, Mrst, Docs, State) -> last_seq := LastSeq } = State, - lists:foreach(fun(Doc) -> - couch_views_fdb:write_doc(TxDb, Sig, Views, Doc) + ViewIds = [View#mrview.id_num || View <- Views], + KeyLimit = key_size_limit(), + ValLimit = value_size_limit(), + + lists:foreach(fun(Doc0) -> + Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), + couch_views_fdb:write_doc(TxDb, Sig, ViewIds, Doc1) end, Docs), couch_views_fdb:set_update_seq(TxDb, Sig, LastSeq). @@ -368,6 +375,41 @@ start_query_server(#mrst{} = Mrst) -> Mrst. +calculate_kv_sizes(Mrst, Doc, KeyLimit, ValLimit) -> + #mrst{ + db_name = DbName, + idx_name = IdxName + } = Mrst, + #{ + results := Results + } = Doc, + try + KVSizes = lists:map(fun(ViewRows) -> + lists:foldl(fun({K, V}, Acc) -> + KeySize = erlang:external_size(K), + ValSize = erlang:external_size(V), + + if KeySize =< KeyLimit -> ok; true -> + throw({size_error, key}) + end, + + if ValSize =< ValLimit -> ok; true -> + throw({size_error, value}) + end, + + Acc + KeySize + ValSize + end, 0, ViewRows) + end, Results), + Doc#{kv_sizes => KVSizes} + catch throw:{size_error, Type} -> + #{id := DocId} = Doc, + Fmt = "View ~s size error for docid `~s`, excluded from indexing " + "in db `~s` for design doc `~s`", + couch_log:error(Fmt, [Type, DocId, DbName, IdxName]), + Doc#{deleted := true, results := [], kv_sizes => []} + end. + + report_progress(State, UpdateType) -> #{ tx_db := TxDb, @@ -419,3 +461,11 @@ num_changes() -> retry_limit() -> config:get_integer("couch_views", "retry_limit", 3). + + +key_size_limit() -> + config:get_integer("couch_views", "key_size_limit", ?KEY_SIZE_LIMIT). + + +value_size_limit() -> + config:get_integer("couch_views", "value_size_limit", ?VALUE_SIZE_LIMIT). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index 17adc42ec..9482fdd85 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -398,7 +398,7 @@ handle_size_key_limits(Db) -> end), DDoc = create_ddoc(multi_emit_key_limit), - Docs = [doc(1)] ++ [doc(2)], + Docs = [doc(1, 2)] ++ [doc(2, 1)], {ok, _} = fabric2_db:update_docs(Db, [DDoc | Docs], []), @@ -412,12 +412,12 @@ handle_size_key_limits(Db) -> ), ?assertEqual([ - row(<<"1">>, 1, 1) + row(<<"1">>, 2, 2) ], Out), {ok, Doc} = fabric2_db:open_doc(Db, <<"2">>), Doc2 = Doc#doc { - body = {[{<<"val">>,3}]} + body = {[{<<"val">>, 2}]} }, {ok, _} = fabric2_db:update_doc(Db, Doc2), @@ -431,8 +431,8 @@ handle_size_key_limits(Db) -> ), ?assertEqual([ - row(<<"1">>, 1, 1), - row(<<"2">>, 3, 3) + row(<<"1">>, 2, 2), + row(<<"2">>, 2, 2) ], Out1). @@ -558,7 +558,7 @@ create_ddoc(multi_emit_key_limit) -> {<<"views">>, {[ {<<"map_fun1">>, {[ {<<"map">>, <<"function(doc) { " - "if (doc.val === 2) { " + "if (doc.val === 1) { " "emit('a very long string to be limited', doc.val);" "} else {" "emit(doc.val, doc.val)" |