summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Vatamaniuc <vatamane@apache.org>2017-07-29 01:50:56 -0400
committerNick Vatamaniuc <nickva@users.noreply.github.com>2017-07-31 19:41:15 -0400
commit1022c2507631cc642693a6efc615c43bc4c1617f (patch)
tree4129bd99bd72012d6a5d5c2b125b6ef4217f6323
parent0d1994e2ea4f2bd90d2b0aa0cd1e594acc84e426 (diff)
downloadcouchdb-1022c2507631cc642693a6efc615c43bc4c1617f.tar.gz
Save migrated replicator checkpoint documents immediately
Previously, if the replication id algorithm was updated, replicator would migrate checkpoint documents but keep them in memory. They would be written to their respective databases only if checkpoints need to be updated, which doesn't happen unless the source database changes. As a result it was possible for checkpoints to be lost. Here is how it could happen: 1. Checkpoints were created for current (3) version of the replicator document. Assume the replication document contains some credentials tha look like 'adm:pass', and the commputed v3 replication id is "3abc...". 2. Replication id algorithm is updated to version 4. Version 4 ignores passwords, such that changing authentication from 'adm:pass' to 'adm:pass2' would not change the replication ids. 3. Server code is updated with version 4. Replicator looks for checkpoints with the new version 4, which it calculates to be "4def...". It can't find it, so it looks for v3, it finds "3abc..." and decides to migrate it. However migration only happens in memory. That is, the checkpoint document is updated but it need a checkpoint to happen for it to be written to disk. 4. There are no changes to the source db. So no checkpoints are forced to happen. 5. User hears that the new replicator version is improved and passwords shouldn't alter the replication ids and all the checkpoints are reused. They update the replication document with their new credentials - adm:pass2. 6. The updated document with 'adm:pass2' credentials is processed by the replicator. It computes the v4 replication id - "4def...". It's the same as before since it wasn't affected by pass -> pass2 change. That replication checkpoint document is not found on neither source not target. Replicator then computes v3 of the id to find the older version. However, v3 is affected by the passwords, so there it computes "3ghi..." which is different from previous v3 which was "3abc..." It cannot find it. Computes v2 and checks, then v1, and eventually gives up not finding checkpoint and restart the change feed from 0 again. To fix it, update `find_replication_logs` to also write the migrated replication checkpoint documents to their respective databases as soon as it finds them.
-rw-r--r--src/couch_replicator/src/couch_replicator_scheduler_job.erl16
1 files changed, 14 insertions, 2 deletions
diff --git a/src/couch_replicator/src/couch_replicator_scheduler_job.erl b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
index 88abe7c66..6a5722521 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler_job.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler_job.erl
@@ -568,7 +568,7 @@ init_state(Rep) ->
{ok, SourceInfo} = couch_replicator_api_wrap:get_db_info(Source),
{ok, TargetInfo} = couch_replicator_api_wrap:get_db_info(Target),
- [SourceLog, TargetLog] = find_replication_logs([Source, Target], Rep),
+ [SourceLog, TargetLog] = find_and_migrate_logs([Source, Target], Rep),
{StartSeq0, History} = compare_replication_logs(SourceLog, TargetLog),
StartSeq1 = get_value(since_seq, Options, StartSeq0),
@@ -610,7 +610,7 @@ init_state(Rep) ->
State#rep_state{timer = start_timer(State)}.
-find_replication_logs(DbList, #rep{id = {BaseId, _}} = Rep) ->
+find_and_migrate_logs(DbList, #rep{id = {BaseId, _}} = Rep) ->
LogId = ?l2b(?LOCAL_DOC_PREFIX ++ BaseId),
fold_replication_logs(DbList, ?REP_ID_VERSION, LogId, LogId, Rep, []).
@@ -632,11 +632,23 @@ fold_replication_logs([Db | Rest] = Dbs, Vsn, LogId, NewId, Rep, Acc) ->
Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [Doc | Acc]);
{ok, Doc} ->
MigratedLog = #doc{id = NewId, body = Doc#doc.body},
+ maybe_save_migrated_log(Rep, Db, MigratedLog, Doc#doc.id),
fold_replication_logs(
Rest, ?REP_ID_VERSION, NewId, NewId, Rep, [MigratedLog | Acc])
end.
+maybe_save_migrated_log(Rep, Db, #doc{} = Doc, OldId) ->
+ case get_value(use_checkpoints, Rep#rep.options, true) of
+ true ->
+ update_checkpoint(Db, Doc),
+ Msg = "Migrated replication checkpoint. Db:~p ~p -> ~p",
+ couch_log:notice(Msg, [httpdb_strip_creds(Db), OldId, Doc#doc.id]);
+ false ->
+ ok
+ end.
+
+
spawn_changes_manager(Parent, ChangesQueue, BatchSize) ->
spawn_link(fun() ->
changes_manager_loop_open(Parent, ChangesQueue, BatchSize, 1)