diff options
author | Adam Kocoloski <kocolosk@apache.org> | 2021-03-01 21:27:55 -0500 |
---|---|---|
committer | Adam Kocoloski <kocolosk@apache.org> | 2021-03-01 21:27:55 -0500 |
commit | eb459d80aaef0a3fdbf34711fd74bcd3329d19e1 (patch) | |
tree | 5013f55fd7f5c2c4904c4b2279c80413301e0e1d | |
parent | a9e0ebe2434e4bcdc7fe156e888767e7e57c26ec (diff) | |
download | couchdb-snapshots-reads-for-indexing.tar.gz |
Relax isolation level when indexer reads from DBsnapshots-reads-for-indexing
This patch causes the indexing subsystem to use snapshot isolation when
reading from the database. This reduces commit conflicts and ensures
the index can make progress even in the case of frequently updated docs.
In the pathological case, a document updated in a fast loop can cause
the indexer to stall out entirely when using serializable reads. Each
successful update of the doc will cause the indexer to fail to commit.
The indexer will retry with a new GRV but the same target DbSeq. In the
meantime, our frequently updated document will have advanced beyond
DbSeq and so the indexer will finish without indexing it in that pass.
This process can be repeated ad infinitum and the document will never
actually show up in a view response.
Snapshot reads are safe for this use case precisely because we do have
the _changes feed, and we can always be assured that a concurrent doc
update will show up again later in the feed.
-rw-r--r-- | src/couch_views/src/couch_views_indexer.erl | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index e3b2ad5bc..e3c1ebaeb 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -207,10 +207,12 @@ do_update(Db, Mrst0, State0) -> tx := Tx } = TxDb, + Snapshot = TxDb#{ tx := erlfdb:snapshot(Tx) }, + State1 = get_update_start_state(TxDb, Mrst0, State0), Mrst1 = couch_views_trees:open(TxDb, Mrst0), - {ok, State2} = fold_changes(State1), + {ok, State2} = fold_changes(Snapshot, State1), #{ doc_acc := DocAcc, @@ -220,7 +222,7 @@ do_update(Db, Mrst0, State0) -> design_opts := DesignOpts } = State2, - DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc), + DocAcc1 = fetch_docs(Snapshot, DesignOpts, DocAcc), {Mrst2, MappedDocs} = map_docs(Mrst0, DocAcc1), TotalKVs = write_docs(TxDb, Mrst1, MappedDocs, State2), @@ -294,12 +296,11 @@ get_update_start_state(TxDb, _Idx, State) -> }. -fold_changes(State) -> +fold_changes(Snapshot, State) -> #{ view_seq := SinceSeq, db_seq := DbSeq, - limit := Limit, - tx_db := TxDb + limit := Limit } = State, FoldState = State#{ @@ -312,7 +313,8 @@ fold_changes(State) -> {limit, Limit}, {restart_tx, false} ], - case fabric2_db:fold_changes(TxDb, SinceSeq, Fun, FoldState, Opts) of + + case fabric2_db:fold_changes(Snapshot, SinceSeq, Fun, FoldState, Opts) of {ok, #{rows_processed := 0} = FinalState} when Limit > 0 -> % If we read zero rows with a non-zero limit % it means we've caught up to the DbSeq as our |