diff options
author | Paul J. Davis <paul.joseph.davis@gmail.com> | 2020-03-13 16:57:38 -0500 |
---|---|---|
committer | Paul J. Davis <paul.joseph.davis@gmail.com> | 2020-03-13 17:05:55 -0500 |
commit | d71ed79aa659852400e2f52ab9f5ad0cc6044f1d (patch) | |
tree | c3bfc4fad86992857c2172f3910a12aabc07bd3f | |
parent | 308762be8658153be1d58e3f8378b75a5b84d7d9 (diff) | |
download | couchdb-d71ed79aa659852400e2f52ab9f5ad0cc6044f1d.tar.gz |
Fetch existing keys in parallelarchive/prototype/fdb-layer-view-tracingprototype/fdb-layer-view-tracing
Fetching the existing keys was a time sink when updating the indices.
This moves the key fetching into a single pass for all docs
simultaneously.
-rw-r--r-- | src/couch_views/src/couch_views_fdb.erl | 44 | ||||
-rw-r--r-- | src/couch_views/src/couch_views_indexer.erl | 6 |
2 files changed, 31 insertions, 19 deletions
diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index b85f2bd18..482d35fcf 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -21,6 +21,7 @@ fold_map_idx/6, + get_existing_keys/3, write_doc/4 ]). @@ -107,13 +108,34 @@ fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> Acc1. +get_existing_keys(TxDb, Sig, Docs) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + Futures = lists:map(fun(#{id := DocId}) -> + {Start, End} = id_idx_range(DbPrefix, Sig, DocId), + erlfdb:fold_range_future(Tx, Start, End, []) + end, Docs), + + lists:zipwith(fun(Future, #{id := DocId} = Doc) -> + Entries = erlfdb:fold_range_wait(Tx, Future, fun({K, V}, Acc) -> + {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = + erlfdb_tuple:unpack(K, DbPrefix), + [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), + [{ViewId, TotalKeys, TotalSize, UniqueKeys} | Acc] + end, []), + Doc#{existing_keys => lists:sort(Entries)} + end, Futures, Docs). + + write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> #{ - id := DocId + id := DocId, + existing_keys := ExistingViewKeys } = Doc, - ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), - clear_id_idx(TxDb, Sig, DocId), lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), @@ -124,11 +146,11 @@ write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> write_doc(TxDb, Sig, ViewIds, Doc) -> #{ id := DocId, + existing_keys := ExistingViewKeys, results := Results, kv_sizes := KVSizes } = Doc, - ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), lists:foreach(fun({ViewId, NewRows, KVSize}) -> @@ -246,20 +268,6 @@ update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) -> end, KVsToAdd). -get_view_keys(TxDb, Sig, DocId) -> - #{ - tx := Tx, - db_prefix := DbPrefix - } = TxDb, - {Start, End} = id_idx_range(DbPrefix, Sig, DocId), - lists:map(fun({K, V}) -> - {?DB_VIEWS, ?VIEW_DATA, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = - erlfdb_tuple:unpack(K, DbPrefix), - [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), - {ViewId, TotalKeys, TotalSize, UniqueKeys} - end, erlfdb:get_range(Tx, Start, End, [])). - - update_row_count(TxDb, Sig, ViewId, Increment) -> #{ tx := Tx, diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index b787574d0..47860051f 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -310,7 +310,7 @@ map_docs(Mrst, Docs) -> {Mrst1, MappedDocs}. -write_docs(TxDb, Mrst, Docs, State) -> +write_docs(TxDb, Mrst, Docs0, State) -> #mrst{ views = Views, sig = Sig @@ -324,6 +324,10 @@ write_docs(TxDb, Mrst, Docs, State) -> KeyLimit = key_size_limit(), ValLimit = value_size_limit(), + Docs = ctrace:with_span(get_existing_keys, fun() -> + couch_views_fdb:get_existing_keys(TxDb, Sig, Docs0) + end), + lists:foreach(fun(Doc0) -> Doc1 = calculate_kv_sizes(Mrst, Doc0, KeyLimit, ValLimit), Tags = #{doc_id => maps:get(id, Doc0)}, |