From d71ed79aa659852400e2f52ab9f5ad0cc6044f1d Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Fri, 13 Mar 2020 16:57:38 -0500 Subject: Fetch existing keys in parallel Fetching the existing keys was a time sink when updating the indices. This moves the key fetching into a single pass for all docs simultaneously. --- src/couch_views/src/couch_views_fdb.erl | 44 +++++++++++++++++------------ src/couch_views/src/couch_views_indexer.erl | 6 +++- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index b85f2bd18..482d35fcf 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -21,6 +21,7 @@ fold_map_idx/6, + get_existing_keys/3, write_doc/4 ]). @@ -107,13 +108,34 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> Acc1. +get_existing_keys(TxDb, Sig, Docs) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + Futures = lists:map(fun(#{id := DocId}) -> + {Start, End} = id_idx_range(DbPrefix, Sig, DocId), + erlfdb:fold_range_future(Tx, Start, End, []) + end, Docs), + + lists:zipwith(fun(Future, #{id := DocId} = Doc) -> + Entries = erlfdb:fold_range_wait(Tx, Future, fun({K, V}, Acc) -> + {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = + erlfdb_tuple:unpack(K, DbPrefix), + [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), + [{ViewId, TotalKeys, TotalSize, UniqueKeys} | Acc] + end, []), + Doc#{existing_keys => lists:sort(Entries)} + end, Futures, Docs). + + write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> #{ - id := DocId + id := DocId, + existing_keys := ExistingViewKeys } = Doc, - ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), - clear_id_idx(TxDb, Sig, DocId), lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), @@ -124,11 +146,11 @@ write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> write_doc(TxDb, Sig, ViewIds, Doc) -> #{ id := DocId, + existing_keys := ExistingViewKeys, results := Results, kv_sizes := KVSizes } = Doc, - ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), lists:foreach(fun({ViewId, NewRows, KVSize}) -> @@ -246,20 +268,6 @@ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> end, KVsToAdd). -get_view_keys(TxDb, Sig, DocId) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - {Start, End} = id_idx_range(DbPrefix, Sig, DocId), - lists:map(fun({K, V}) -> - {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = - erlfdb_tuple:unpack(K, DbPrefix), - [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), - {ViewId, TotalKeys, TotalSize, UniqueKeys} - end, erlfdb:get_range(Tx, Start, End, [])). - - update_row_count(TxDb, Sig, ViewId, Increment) -> #{ tx := Tx, diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index b787574d0..47860051f 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -310,7 +310,7 @@ map_docs(Mrst, Docs) -> {Mrst1, MappedDocs}. -write_docs(TxDb, Mrst, Docs, State) -> +write_docs(TxDb, Mrst, Docs0, State) -> #mrst{ views = Views, sig = Sig @@ -324,6 +324,10 @@ write_docs(TxDb, Mrst, Docs, State) -> KeyLimit = key_size_limit(), ValLimit = value_size_limit(), + Docs = ctrace:with_span(get_existing_keys, fun() -> + couch_views_fdb:get_existing_keys(TxDb, Sig, Docs0) + end), + lists:foreach(fun(Doc0) -> Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), Tags = #{doc_id => maps:get(id, Doc0)}, -- cgit v1.2.1