summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@gmail.com>2023-05-11 14:58:24 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2023-05-11 17:46:52 -0400
commit85e1fa7913b5a564c1731ad86fbba294a9d9a16c (patch)
tree7f570e29e72926dd1e81c328b08aba97fb34773d
parenta854625d74a5b3847b99c6f536187723821d0aae (diff)
downloadcouchdb-85e1fa7913b5a564c1731ad86fbba294a9d9a16c.tar.gz
Speed up internal replicator
Increase internal replicator default batch size and batch count. On systems with a slower (remote) disks, or a slower dist protocol, internal replicator can easily fall behind during a high rate of bulk_docs ingestion. For each batch of 100 it had to sync security properties, make an rpc call to fetch remote target sync checkpoint, open handles, fetch revs diff, etc. If there are changes to sync it would also incur the commit (fsycn) delay as well. It make sense to operate on slightly larger batches to increase performance. I picked 500 as that's the default for the (external) replicator. It also helps to keep replicating more than one batch once we've brought the source and target data into the page cache, so opted to make it do 5 batches per job run at most. A survey of other batch size already in use by the internal replicator: * Shard splitting uses a batch of 2000 [1]. * Seed" system dbs replication uses 1000 [2] There is some danger in creating too large of a rev list for highly conflicted documents. In that case already have chunking for max rev [3] to keep everything under 5000 revs per batch. To be on the safe side both values are now configurable and can be adjusted at runtime. To validate how this affects performance used a simple benchmarking utility: https://gist.github.com/nickva/9a2a3665702a876ec06d3d720aa19b0a With defaults: ``` fabric_bench:go(). ... *** DB fabric-bench-1683835787725432000 [{q,4},{n,3}] created. Inserting 100000 docs * Add 100000 docs small, bs=1000 (Hz): 420 --- mem3_sync backlog: 76992 --- mem3_sync backlog: 82792 --- mem3_sync backlog: 107592 ... snipped a few minutes of waiting for backlog to clear ... --- mem3_sync backlog: 1500 --- mem3_sync backlog: 0 ... ok ``` With this PR ``` (node1@127.0.0.1)3> fabric_bench:go(). ... *** DB fabric-bench-1683834758071419000 [{q,4},{n,3}] created. Inserting 100000 docs * Add 100000 docs small, bs=1000 (Hz): 600 --- mem3_sync backlog: 0 ... ok ``` 100000 doc insertion rate improved from 420 docs/sec to 600 with no minutes long sync backlog left over. [1] https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_reshard_job.erl#L52 [2] https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_rpc.erl#L181 [3] https://github.com/apache/couchdb/blob/a854625d74a5b3847b99c6f536187723821d0aae/src/mem3/src/mem3_rep.erl#L609
-rw-r--r--src/mem3/src/mem3_sync.erl15
1 files changed, 11 insertions, 4 deletions
diff --git a/src/mem3/src/mem3_sync.erl b/src/mem3/src/mem3_sync.erl
index 179435965..f6997860d 100644
--- a/src/mem3/src/mem3_sync.erl
+++ b/src/mem3/src/mem3_sync.erl
@@ -45,10 +45,14 @@
-include_lib("mem3/include/mem3.hrl").
-include_lib("couch/include/couch_db.hrl").
+-define(DEFAULT_CONCURRENCY, 10).
+-define(DEFAULT_BATCH_SIZE, 500).
+-define(DEFAULT_BATCH_COUNT, 5).
+
-record(state, {
active = [],
count = 0,
- limit,
+ limit = ?DEFAULT_CONCURRENCY,
dict = dict:new(),
waiting = queue:new()
}).
@@ -87,10 +91,10 @@ remove_shard(Shard) ->
init([]) ->
process_flag(trap_exit, true),
- Concurrency = config:get("mem3", "sync_concurrency", "10"),
+ Concurrency = config:get_integer("mem3", "sync_concurrency", ?DEFAULT_CONCURRENCY),
gen_event:add_handler(mem3_events, mem3_sync_event, []),
initial_sync(),
- {ok, #state{limit = list_to_integer(Concurrency)}}.
+ {ok, #state{limit = Concurrency}}.
handle_call({push, Job}, From, State) ->
handle_cast({push, Job#job{pid = From}}, State);
@@ -236,7 +240,10 @@ start_push_replication(#job{name = Name, node = Node, pid = From}) ->
true -> ok
end,
spawn_link(fun() ->
- case mem3_rep:go(Name, maybe_redirect(Node)) of
+ BatchSize = config:get_integer("mem3", "sync_batch_size", ?DEFAULT_BATCH_SIZE),
+ BatchCount = config:get_integer("mem3", "sync_batch_count", ?DEFAULT_BATCH_COUNT),
+ Opts = [{batch_size, BatchSize}, {batch_count, BatchCount}],
+ case mem3_rep:go(Name, maybe_redirect(Node), Opts) of
{ok, Pending} when Pending > 0 ->
exit({pending_changes, Pending});
_ ->