diff options
author | Adam Kocoloski <kocolosk@apache.org> | 2021-03-02 17:13:52 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-02 17:13:52 -0500 |
commit | 650ba28bb9f09bbd7d4740f2d030b24d5169401c (patch) | |
tree | f9f935ce546103bcac06f2ce2a045cd47f3c7308 | |
parent | fac13a3ff0cfe6502dc424a9a129d7dd265290e4 (diff) | |
download | couchdb-650ba28bb9f09bbd7d4740f2d030b24d5169401c.tar.gz |
Relax isolation level when indexer reads from DB (#3393)
* Relax isolation level when indexer reads from DB
This patch causes the indexing subsystem to use snapshot isolation when
reading from the database. This reduces commit conflicts and ensures
the index can make progress even in the case of frequently updated docs.
In the pathological case, a document updated in a fast loop can cause
the indexer to stall out entirely when using serializable reads. Each
successful update of the doc will cause the indexer to fail to commit.
The indexer will retry with a new GRV but the same target DbSeq. In the
meantime, our frequently updated document will have advanced beyond
DbSeq and so the indexer will finish without indexing it in that pass.
This process can be repeated ad infinitum and the document will never
actually show up in a view response.
Snapshot reads are safe for this use case precisely because we do have
the _changes feed, and we can always be assured that a concurrent doc
update will show up again later in the feed.
* Bump erlfdb version
Needed to pull in fix for snapshot range reads.
-rw-r--r-- | rebar.config.script | 2 | ||||
-rw-r--r-- | src/couch_views/src/couch_views_indexer.erl | 14 |
2 files changed, 9 insertions, 7 deletions
diff --git a/rebar.config.script b/rebar.config.script index 4abdcc9a3..9993cb1ab 100644 --- a/rebar.config.script +++ b/rebar.config.script @@ -153,7 +153,7 @@ DepDescs = [ %% Independent Apps {config, "config", {tag, "2.1.8"}}, {b64url, "b64url", {tag, "1.0.2"}}, -{erlfdb, "erlfdb", {tag, "v1.2.3"}}, +{erlfdb, "erlfdb", {tag, "v1.2.5"}}, {ets_lru, "ets-lru", {tag, "1.1.0"}}, {khash, "khash", {tag, "1.1.0"}}, {snappy, "snappy", {tag, "CouchDB-1.0.4"}}, diff --git a/src/couch_views/src/couch_views_indexer.erl b/src/couch_views/src/couch_views_indexer.erl index 8019d9f6a..5a88cc606 100644 --- a/src/couch_views/src/couch_views_indexer.erl +++ b/src/couch_views/src/couch_views_indexer.erl @@ -209,10 +209,12 @@ do_update(Db, Mrst0, State0) -> tx := Tx } = TxDb, + Snapshot = TxDb#{ tx := erlfdb:snapshot(Tx) }, + State1 = get_update_start_state(TxDb, Mrst0, State0), Mrst1 = couch_views_trees:open(TxDb, Mrst0), - {ok, State2} = fold_changes(State1), + {ok, State2} = fold_changes(Snapshot, State1), #{ doc_acc := DocAcc, @@ -222,7 +224,7 @@ do_update(Db, Mrst0, State0) -> design_opts := DesignOpts } = State2, - DocAcc1 = fetch_docs(TxDb, DesignOpts, DocAcc), + DocAcc1 = fetch_docs(Snapshot, DesignOpts, DocAcc), {Mrst2, MappedDocs} = map_docs(Mrst0, DocAcc1), TotalKVs = write_docs(TxDb, Mrst1, MappedDocs, State2), @@ -296,12 +298,11 @@ get_update_start_state(TxDb, _Idx, State) -> }. -fold_changes(State) -> +fold_changes(Snapshot, State) -> #{ view_seq := SinceSeq, db_seq := DbSeq, - limit := Limit, - tx_db := TxDb + limit := Limit } = State, FoldState = State#{ @@ -314,7 +315,8 @@ fold_changes(State) -> {limit, Limit}, {restart_tx, false} ], - case fabric2_db:fold_changes(TxDb, SinceSeq, Fun, FoldState, Opts) of + + case fabric2_db:fold_changes(Snapshot, SinceSeq, Fun, FoldState, Opts) of {ok, #{rows_processed := 0} = FinalState} when Limit > 0 -> % If we read zero rows with a non-zero limit % it means we've caught up to the DbSeq as our |