summaryrefslogtreecommitdiff
path: root/src/mongo/tla_plus
diff options
context:
space:
mode:
Diffstat (limited to 'src/mongo/tla_plus')
-rw-r--r--src/mongo/tla_plus/.gitignore3
-rw-r--r--src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.cfg34
-rw-r--r--src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.tla19
-rw-r--r--src/mongo/tla_plus/MongoReplReconfig/MongoReplReconfig.tla492
-rw-r--r--src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.cfg60
-rw-r--r--src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.tla15
-rw-r--r--src/mongo/tla_plus/MultiTenantMigrations/MultiTenantMigrations.tla497
-rw-r--r--src/mongo/tla_plus/README.md23
-rw-r--r--src/mongo/tla_plus/RaftMongo/MCRaftMongo.cfg30
-rw-r--r--src/mongo/tla_plus/RaftMongo/MCRaftMongo.tla13
-rw-r--r--src/mongo/tla_plus/RaftMongo/RaftMongo.tla331
-rw-r--r--src/mongo/tla_plus/RaftMongoWithRaftReconfig.tla263
-rwxr-xr-xsrc/mongo/tla_plus/download-tlc.sh6
-rwxr-xr-xsrc/mongo/tla_plus/model-check.sh40
14 files changed, 1826 insertions, 0 deletions
diff --git a/src/mongo/tla_plus/.gitignore b/src/mongo/tla_plus/.gitignore
new file mode 100644
index 00000000000..a9dcc645fd5
--- /dev/null
+++ b/src/mongo/tla_plus/.gitignore
@@ -0,0 +1,3 @@
+**/*.toolbox
+**/states/
+tla2tools.jar
diff --git a/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.cfg b/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.cfg
new file mode 100644
index 00000000000..beb09332519
--- /dev/null
+++ b/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.cfg
@@ -0,0 +1,34 @@
+\* Config file to run the TLC model-checker on MongoReplReconfig.tla.
+\* See MongoReplReconfig.tla for instructions.
+
+SPECIFICATION Spec
+CONSTANTS
+Leader = Leader
+Follower = Follower
+Down = Down
+
+\* The set of server IDs. Adjust to test different replica set sizes.
+CONSTANT Server = {n1, n2, n3}
+
+CONSTANTS
+\* The longest oplog any server can reach during model-checking.
+MaxLogLen = 2
+
+\* The number of election terms to simulate during model-checking.
+MaxTerm = 3
+
+\* The number of reconfigs allowed during model-checking.
+MaxConfigVersion = 3
+
+\* The number of commit points advanced during model-checking.
+MaxCommittedEntries = 3
+
+\* Constrain the model to be finite and leverage the symmetry to reduce
+\* the state space.
+\* Symmetry checking may invalidate liveness checking in certain cases.
+SYMMETRY ServerSymmetry
+CONSTRAINT StateConstraint
+
+\* Invariants and properties checked by TLC.
+INVARIANT ElectionSafety
+PROPERTY NeverRollbackCommitted
diff --git a/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.tla b/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.tla
new file mode 100644
index 00000000000..5ebe4859678
--- /dev/null
+++ b/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.tla
@@ -0,0 +1,19 @@
+---- MODULE MCMongoReplReconfig ----
+\* This module defines MCMongoReplReconfig.tla constants/constraints for model-checking.
+
+EXTENDS MongoReplReconfig
+
+(**************************************************************************************************)
+(* State Constraint. Used for model checking only. *)
+(**************************************************************************************************)
+
+CONSTANTS MaxTerm, MaxLogLen, MaxConfigVersion, MaxCommittedEntries
+
+StateConstraint == \A s \in Server :
+ /\ currentTerm[s] <= MaxTerm
+ /\ Len(log[s]) <= MaxLogLen
+ /\ configVersion[s] <= MaxConfigVersion
+ /\ Cardinality(immediatelyCommitted) <= MaxCommittedEntries
+
+ServerSymmetry == Permutations(Server)
+=============================================================================
diff --git a/src/mongo/tla_plus/MongoReplReconfig/MongoReplReconfig.tla b/src/mongo/tla_plus/MongoReplReconfig/MongoReplReconfig.tla
new file mode 100644
index 00000000000..fe3061827be
--- /dev/null
+++ b/src/mongo/tla_plus/MongoReplReconfig/MongoReplReconfig.tla
@@ -0,0 +1,492 @@
+\* Copyright 2019 MongoDB, Inc.
+\*
+\* This work is licensed under:
+\* - Creative Commons Attribution-3.0 United States License
+\* http://creativecommons.org/licenses/by/3.0/us/
+
+----------------------------- MODULE MongoReplReconfig -----------------------------
+\*
+\* A specification of reconfiguration in the MongoDB replication protocol.
+\* This spec only allows single node changes.
+\*
+
+
+EXTENDS Integers, FiniteSets, Sequences, TLC
+
+\* The set of server IDs.
+CONSTANTS Server
+
+\* Server states.
+CONSTANTS Leader, Follower, Down
+
+(**************************************************************************************************)
+(* Global variables *)
+(**************************************************************************************************)
+
+\* Set of all immediately committed entries.
+\* Each element of the set is a record e.g. [index |-> ..., term |-> ..., configVersion |-> ...]
+\* This set does not include "prefix committed" entries.
+VARIABLE immediatelyCommitted
+
+(**************************************************************************************************)
+(* Per server variables. *)
+(* *)
+(* These are all functions with domain Server. *)
+(**************************************************************************************************)
+
+\* The server's term number.
+VARIABLE currentTerm
+
+\* The server's state (Leader, Follower or Down).
+VARIABLE state
+
+serverVars == <<currentTerm, state>>
+
+\* A sequence of log entries. The index into this sequence is the index of the log entry.
+VARIABLE log
+
+\*
+\* Reconfig related variables.
+\*
+
+\* A server's current config. A config is just a set of servers, i.e. an element of SUBSET Server.
+VARIABLE config
+
+\* The config version of a node's current config.
+VARIABLE configVersion
+
+\* The term in which the current config on a node was written in i.e. the term of the primary
+\* that moved to that config.
+VARIABLE configTerm
+
+configVars == <<config, configVersion, configTerm>>
+
+vars == <<serverVars, log, immediatelyCommitted, config, configVersion, configTerm>>
+
+-------------------------------------------------------------------------------------------
+
+(**************************************************************************************************)
+(* Generic helper operators *)
+(**************************************************************************************************)
+
+\* The set of all quorums of a given set.
+Quorums(S) == {i \in SUBSET(S) : Cardinality(i) * 2 > Cardinality(S)}
+
+\* Return the minimum value from a set, or undefined if the set is empty.
+Min(s) == CHOOSE x \in s : \A y \in s : x <= y
+
+\* Return the maximum value from a set, or undefined if the set is empty.
+Max(s) == CHOOSE x \in s : \A y \in s : x >= y
+
+\* Return the range of a given function.
+Range(f) == {f[x] : x \in DOMAIN f}
+
+\* Is a sequence empty.
+Empty(s) == Len(s) = 0
+
+\* Alive nodes in a set.
+AliveNodes(s) == { n \in s : state[n] # Down }
+
+-------------------------------------------------------------------------------------------
+
+(******************************************************************************)
+(* Next state actions. *)
+(* *)
+(* This section defines the core steps of the algorithm, along with some *)
+(* related helper definitions/operators. We annotate the main actions with *)
+(* an [ACTION] specifier to distinguish them from auxiliary, helper operators.*)
+(******************************************************************************)
+
+\* The term of the last entry in a log, or 0 if the log is empty.
+LastTerm(xlog) == IF Len(xlog) = 0 THEN 0 ELSE xlog[Len(xlog)].term
+GetTerm(xlog, index) == IF index = 0 THEN 0 ELSE xlog[index].term
+LogTerm(i, index) == GetTerm(log[i], index)
+
+\* Is it possible for log 'i' to roll back against log 'j'.
+\* If this is true, it implies that log 'i' should remove entries from the end of its log.
+CanRollback(i, j) ==
+ /\ Len(log[i]) > 0
+ /\ \* The log with later term is more up-to-date.
+ LastTerm(log[i]) < LastTerm(log[j])
+ /\ \/ Len(log[i]) > Len(log[j])
+ \/ /\ Len(log[i]) <= Len(log[j])
+ /\ LastTerm(log[i]) /= LogTerm(j, Len(log[i]))
+
+\* Is the config of node i considered 'newer' than the config of node j. This is the condition for
+\* node j to accept the config of node i.
+HasSameConfig(i, j) ==
+ /\ configTerm[i] = configTerm[j]
+ /\ configVersion[i] = configVersion[j]
+
+\* Exchange terms between two nodes and step down the primary if needed.
+UpdateTerms(i, j) ==
+ \* Update terms of sender and receiver i.e. to simulate an RPC request and response (heartbeat).
+ /\ currentTerm' = [currentTerm EXCEPT ![i] = Max({currentTerm[i], currentTerm[j]}),
+ ![j] = Max({currentTerm[i], currentTerm[j]})]
+ \* May update state of sender or receiver.
+ /\ state' = [state EXCEPT ![j] = IF currentTerm[j] < currentTerm[i] THEN Follower ELSE state[j],
+ ![i] = IF currentTerm[i] < currentTerm[j] THEN Follower ELSE state[i] ]
+
+UpdateTermsOnNodes(i, j) == /\ UpdateTerms(i, j)
+ /\ UNCHANGED <<log, immediatelyCommitted, configVars>>
+(******************************************************************************)
+(* [ACTION] *)
+(* *)
+(* Node 'i' rolls back against the log of node 'j'. *)
+(******************************************************************************)
+RollbackEntries(i, j) ==
+ /\ CanRollback(i, j)
+ /\ j \in config[i]
+ \* Roll back one log entry.
+ /\ log' = [log EXCEPT ![i] = SubSeq(log[i], 1, Len(log[i])-1)]
+ /\ UNCHANGED <<immediatelyCommitted, configVars, serverVars>>
+
+(******************************************************************************)
+(* [ACTION] *)
+(* *)
+(* Node 'i' gets a new log entry from node 'j'. *)
+(******************************************************************************)
+GetEntry(i, j) ==
+ /\ j \in config[i]
+ /\ state[i] = Follower
+ \* Node j must have more entries than node i.
+ /\ Len(log[j]) > Len(log[i])
+ \* Ensure that the entry at the last index of node i's log must match the entry at
+ \* the same index in node j's log. If the log of node i is empty, then the check
+ \* trivially passes. This is the essential 'log consistency check'.
+ /\ LET logOk == IF Empty(log[i]) THEN TRUE
+ ELSE LogTerm(j, Len(log[i])) = LastTerm(log[i]) IN
+ /\ logOk \* log consistency check
+ /\ LET newEntryIndex == Len(log[i]) + 1
+ newEntry == log[j][newEntryIndex]
+ newLog == Append(log[i], newEntry) IN
+ /\ log' = [log EXCEPT ![i] = newLog]
+ /\ UpdateTerms(i, j)
+ /\ UNCHANGED <<immediatelyCommitted, configVars>>
+
+
+\* Check whether the entry at "index" on "primary" is committed in the primary's current config.
+IsCommitted(index, primary) ==
+ \* The entry was written by this leader.
+ /\ LogTerm(primary, index) = currentTerm[primary]
+ /\ \E quorum \in Quorums(config[primary]):
+ \* all nodes have this log entry and are in the term of the leader.
+ \A s \in quorum:
+ /\ Len(log[s]) >= index
+ /\ log[s][index] = log[primary][index] \* they have the entry.
+ /\ currentTerm[s] = currentTerm[primary] \* they are in the same term.
+
+(******************************************************************************)
+(* [ACTION] *)
+(* *)
+(* A leader i commits its newest log entry. It commits it according to *)
+(* its own config's notion of a quorum. *)
+(******************************************************************************)
+CommitEntry(i) ==
+ \* Must have some entries to commit.
+ /\ ~Empty(log[i])
+ \* This node is leader.
+ /\ state[i] = Leader
+ /\ IsCommitted(Len(log[i]), i)
+ /\ immediatelyCommitted' = immediatelyCommitted \cup
+ {[index |-> Len(log[i]),
+ term |-> currentTerm[i],
+ configVersion |-> configVersion[i]]}
+ /\ UNCHANGED <<serverVars, log, configVars>>
+
+\* Can node 'voter' currently cast a vote for node 'candidate' in term 'term'.
+CanVoteFor(voter, candidate, term) ==
+ LET logOk ==
+ \/ LastTerm(log[candidate]) > LastTerm(log[voter])
+ \/ /\ LastTerm(log[candidate]) = LastTerm(log[voter])
+ /\ Len(log[candidate]) >= Len(log[voter]) IN
+ \* Nodes can only vote once per term, and they will never
+ \* vote for someone with a lesser term than their own.
+ /\ currentTerm[voter] < term
+ \* Only vote for someone if their config is the same as the vote's.
+ /\ HasSameConfig(candidate, voter)
+ /\ logOk
+
+(******************************************************************************)
+(* [ACTION] *)
+(* *)
+(* Node 'i' automatically becomes a leader, if eligible. *)
+(******************************************************************************)
+BecomeLeader(i) ==
+ \* Primaries make decisions based on their current configuration.
+ LET newTerm == currentTerm[i] + 1 IN
+ \E voteQuorum \in Quorums(config[i]) :
+ /\ i \in config[i] \* only become a leader if you are a part of your config.
+ /\ i \in voteQuorum \* The new leader should vote for itself.
+ /\ \A v \in voteQuorum : CanVoteFor(v, i, newTerm)
+ \* Update the terms of each voter.
+ /\ currentTerm' = [s \in Server |-> IF s \in voteQuorum THEN newTerm ELSE currentTerm[s]]
+ /\ state' = [s \in Server |->
+ IF s = i THEN Leader
+ \* All voters should revert to secondary state.
+ ELSE IF s \in voteQuorum THEN Follower
+ ELSE state[s]]
+ \* Update config's term on step-up.
+ /\ configTerm' = [configTerm EXCEPT ![i] = newTerm]
+ /\ UNCHANGED <<log, config, configVersion, immediatelyCommitted>>
+
+
+\* A quorum of nodes have received this config.
+\* With bumping the config term on step-up, this effectively gathers the votes from
+\* nodes, so that nodes in earlier configs cannot win elections.
+ConfigQuorumCheck(self, s) == /\ configVersion[self] = configVersion[s]
+ /\ configTerm[self] = configTerm[s]
+
+\* Was an op was committed in the current config of primary.
+\*
+\* Before a primary can consider a config "safe", it must make sure that any entries that were
+\* acknowledged as "committed" in a prior config are now "committed" in the current config.
+\* If moving from Ci -> Cj, it can do this by checking that the latest entry committed in Ci
+\* is now committed in Cj.
+OpCommittedInConfig(primary) ==
+ \* The primary has at least committed one entry in its term if there's any entry committed
+ \* in earlier terms.
+ /\ \/ immediatelyCommitted = {}
+ \/ \E e \in immediatelyCommitted: IsCommitted(e.index, primary)
+ \* All entries committed in the primary's term have been committed in the current config.
+ /\ \A e \in immediatelyCommitted:
+ e.term = currentTerm[primary] => IsCommitted(e.index, primary)
+
+\* Has the node talked to a quorum as primary?
+TermQuorumCheck(self, s) == currentTerm[self] >= currentTerm[s]
+
+\* Is the config on node i currently "safe".
+ConfigIsSafe(i) ==
+ /\ \E q \in Quorums(config[i]):
+ \A s \in q : /\ TermQuorumCheck(i, s)
+ /\ ConfigQuorumCheck(i, s)
+ /\ OpCommittedInConfig(i)
+
+(******************************************************************************)
+(* [ACTION] *)
+(* *)
+(* A reconfig occurs on node i. The node must currently be a leader. *)
+(******************************************************************************)
+Reconfig(i) ==
+ /\ state[i] = Leader
+ \* Only allow a new config to be installed if the current config is "safe".
+ /\ ConfigIsSafe(i)
+ \* Pick some arbitrary subset of servers to reconfig to.
+ \* Make sure to include this node in the new config, though.
+ /\ \E newConfig \in SUBSET Server :
+ \* Add or remove a single node.
+ /\ \/ \E n \in newConfig : newConfig \ {n} = config[i] \* add 1.
+ \/ \E n \in config[i] : config[i] \ {n} = newConfig \* remove 1.
+ /\ i \in newConfig
+ \* Require that at least a quorum of nodes in the new config are not down.
+ /\ AliveNodes(newConfig) \in Quorums(newConfig)
+ \* The config on this node takes effect immediately
+ /\ config' = [config EXCEPT ![i] = newConfig]
+ \* Record the term of the primary that wrote this config.
+ /\ configTerm' = [configTerm EXCEPT ![i] = currentTerm[i]]
+ \* Increment the local config version. Here we do not assume that config versions
+ \* are globally unique.
+ /\ configVersion' = [configVersion EXCEPT ![i] = @ + 1]
+ /\ UNCHANGED <<serverVars, log, immediatelyCommitted>>
+
+\* [ACTION]
+\* Node i sends its current config to node j. It is only accepted if the config is newer.
+SendConfig(sender, receiver) ==
+ \* Only update config if the received config is newer and its term is >= than your current term.
+ /\ \/ configTerm[sender] > configTerm[receiver]
+ \/ /\ configTerm[sender] = configTerm[receiver]
+ /\ configVersion[sender] > configVersion[receiver]
+ \* A node can learn the config even if it has voted for a higher term.
+ \* This diverges from Raft but is safe because bumping config term on stepup
+ \* and ConfigQuorumCheck serve the election of the config consensus together with
+ \* the data consensus election. They guarantee that old configs cannot elect a primary
+ \* once ConfigIsSafe=True, no matter whether the old configs propagate to voters of
+ \* higher terms.
+ \* /\ configTerm[sender] >= currentTerm[receiver]
+ /\ config' = [config EXCEPT ![receiver] = config[sender]]
+ /\ configVersion' = [configVersion EXCEPT ![receiver] = configVersion[sender]]
+ /\ configTerm' = [configTerm EXCEPT ![receiver] = configTerm[sender]]
+ /\ UpdateTerms(sender, receiver)
+ /\ UNCHANGED <<log, immediatelyCommitted>>
+
+(******************************************************************************)
+(* [ACTION] *)
+(* *)
+(* Shut down a node. Shutdown is used to check liveness properties. *)
+(******************************************************************************)
+ShutDown(i) ==
+ /\ state[i] # Down
+ /\ \A s \in Server:
+ /\ s \in config[s] \* The node isn't removed.
+ \* This spec assumes there is never a majority of an active config down, for
+ \* the sake of checking liveness properties so long as that condition holds.
+ /\ { n \in config[s]: state[n] # Down } \ {i} \in Quorums(config[s])
+ /\ state' = [state EXCEPT ![i] = Down]
+ /\ UNCHANGED <<currentTerm, immediatelyCommitted, log, configVars>>
+
+(******************************************************************************)
+(* [ACTION] *)
+(* *)
+(* Node 'i', a primary, handles a new client request and places the entry in *)
+(* its log. *)
+(******************************************************************************)
+ClientRequest(i) ==
+ /\ state[i] = Leader
+ /\ log' = [log EXCEPT ![i] = Append(@, [term |-> currentTerm[i]])]
+ /\ UNCHANGED <<serverVars, immediatelyCommitted, configVars>>
+
+-------------------------------------------------------------------------------------------
+
+(**************************************************************************************************)
+(* Miscellaneous properties for exploring/understanding the spec. *)
+(**************************************************************************************************)
+
+\* Are there two primaries in the current state.
+TwoPrimaries == \E s, t \in Server : s # t /\ state[s] = Leader /\ state[s] = state[t]
+
+NPrimaries(n) ==
+ \E prims \in SUBSET Server :
+ /\ \A s \in prims : state[s] = Leader
+ /\ Cardinality(prims) = n
+
+\* Are there 'n' concurrent, differing configs active on some set of nodes in
+\* the current state.
+NConcurrentConfigs(n) ==
+ \E S \in SUBSET Server :
+ /\ Cardinality(S) = n
+ /\ \A x, y \in S : x # y => config[x] # config[y]
+
+\* The set of all currently installed configs in the system.
+InstalledConfigs == Range(config)
+
+\* Do all quorums of set x and set y share at least one overlapping node.
+QuorumsOverlap(x, y) == \A qx \in Quorums(x), qy \in Quorums(y) : qx \cap qy # {}
+
+\* Is a given config "active" i.e. can it form a quorum.
+ActiveConfig(cfg) == \E Q \in Quorums(cfg) : \A s \in Q : config[s] = cfg
+
+\* The set of all active configs.
+ActiveConfigs == {c \in InstalledConfigs : ActiveConfig(c)}
+
+\* For all installed configs, do their quorums overlap.
+InstalledConfigsOverlap == \A x,y \in InstalledConfigs : QuorumsOverlap(x, y)
+
+\* For all active configs, do their quorums overlap.
+ActiveConfigsOverlap == \A x,y \in ActiveConfigs : QuorumsOverlap(x, y)
+
+\* Property asserting that there is never more than 1 active config at a time.
+AtMostOneActiveConfig == Cardinality(ActiveConfigs) <= 1
+
+(**************************************************************************************************)
+(* Correctness Properties *)
+(**************************************************************************************************)
+
+\* The set of all log entries in a given log i.e. the set of all <<index, term>>
+\* pairs that appear in the log.
+LogEntries(xlog) == {<<i, xlog[i].term>> : i \in DOMAIN xlog}
+
+\* Is <<index, term>> in the given log.
+EntryInLog(xlog, index, term) == <<index, term>> \in LogEntries(xlog)
+
+\* Is 'xlog' a prefix of 'ylog'.
+IsPrefix(xlog, ylog) ==
+ /\ Len(xlog) <= Len(ylog)
+ /\ xlog = SubSeq(ylog, 1, Len(xlog))
+
+TwoPrimariesInSameTerm ==
+ \E i, j \in Server :
+ /\ i # j
+ /\ currentTerm[i] = currentTerm[j]
+ /\ state[i] = Leader
+ /\ state[j] = Leader
+
+\* There should be at most one leader per term.
+ElectionSafety == ~TwoPrimariesInSameTerm
+
+ConfigVersionIncreasesWithTerm ==
+ ~(\E i, j \in Server :
+ /\ i # j
+ /\ configVersion[i] > configVersion[j]
+ /\ configTerm[i] < configTerm[j]
+ )
+
+\* Only uncommitted entries are allowed to be deleted from logs.
+RollbackCommitted == \E s \in Server :
+ \E e \in immediatelyCommitted :
+ /\ EntryInLog(log[s], e.index, e.term)
+ \* And the entry got rolled back.
+ /\ Len(log'[s]) < e.index
+
+NeverRollbackCommitted == [][~RollbackCommitted]_vars
+
+\* At any time, some node can always become a leader.
+ElectableNodeExists == \E s \in Server : ENABLED BecomeLeader(s)
+
+(**************************************************************************************************)
+(* Liveness properties *)
+(**************************************************************************************************)
+AnyNodeCanRollBack == \E s \in Server :
+ \E syncSource \in Server: ENABLED RollbackEntries(syncSource, s)
+
+ConfigEventuallyPropagates == <>(
+ \/ AnyNodeCanRollBack
+ \/ \A i, j \in Server:
+ \/ i \notin config[j]
+ \/ state[i] = Down
+ \/ configVersion[i] = configVersion[j]
+)
+
+ElectableNodeEventuallyExists == <>(
+ \/ AnyNodeCanRollBack
+ \/ \E s \in Server : state[s] = Leader)
+
+(**************************************************************************************************)
+(* Spec definition *)
+(**************************************************************************************************)
+Init ==
+ \* Server variables.
+ /\ currentTerm = [i \in Server |-> 0]
+ /\ state = [i \in Server |-> Follower]
+ /\ log = [i \in Server |-> << >>]
+ \* Reconfig variables.
+ \* Initially, all nodes start out with the same view of the config.
+ \* We allow an initial config to be any non-empty subset of the current servers.
+ /\ \E initConfig \in (SUBSET Server) :
+ /\ initConfig # {}
+ /\ config = [i \in Server |-> initConfig]
+ /\ configVersion = [i \in Server |-> 0]
+ /\ configTerm = [i \in Server |-> 0]
+ /\ immediatelyCommitted = {}
+
+BecomeLeaderAction == \E s \in AliveNodes(Server) : BecomeLeader(s)
+ClientRequestAction == \E s \in AliveNodes(Server) : ClientRequest(s)
+GetEntryAction == \E s, t \in AliveNodes(Server) : GetEntry(s, t)
+RollbackEntriesAction == \E s, t \in AliveNodes(Server) : RollbackEntries(s, t)
+ReconfigAction == \E s \in AliveNodes(Server) : Reconfig(s)
+SendConfigAction == \E s,t \in AliveNodes(Server) : SendConfig(s, t)
+CommitEntryAction == \E s \in AliveNodes(Server) : CommitEntry(s)
+ShutDownAction == \E s \in AliveNodes(Server) : ShutDown(s)
+UpdateTermsAction == \E s, t \in AliveNodes(Server) : UpdateTermsOnNodes(s, t)
+
+Next ==
+ \/ BecomeLeaderAction
+ \/ ClientRequestAction
+ \/ GetEntryAction
+ \/ RollbackEntriesAction
+ \/ ReconfigAction
+ \/ SendConfigAction
+ \/ CommitEntryAction
+ \/ ShutDownAction
+ \/ UpdateTermsAction
+
+Liveness ==
+ /\ WF_vars(BecomeLeaderAction)
+ /\ WF_vars(SendConfigAction)
+ /\ WF_vars(UpdateTermsAction)
+ /\ WF_vars(GetEntryAction)
+ /\ WF_vars(RollbackEntriesAction)
+ /\ WF_vars(CommitEntryAction)
+
+Spec == Init /\ [][Next]_vars /\ Liveness
+
+=============================================================================
diff --git a/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.cfg b/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.cfg
new file mode 100644
index 00000000000..bdf6a17c98f
--- /dev/null
+++ b/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.cfg
@@ -0,0 +1,60 @@
+\* Config file to run the TLC model-checker on MultiTenantMigrations.tla.
+\* See MultiTenantMigrations.tla for instructions.
+
+CONSTANTS DonorStartMigrationRequest = DonorStartMigrationRequest
+CONSTANTS DonorStartMigrationResponse = DonorStartMigrationResponse
+
+CONSTANTS RecipientSyncData1Request = RecipientSyncData1Request
+CONSTANTS RecipientSyncData1Response = RecipientSyncData1Response
+
+CONSTANTS RecipientSyncData2Request = RecipientSyncData2Request
+CONSTANTS RecipientSyncData2Response = RecipientSyncData2Response
+
+CONSTANTS DonorForgetMigrationRequest = DonorForgetMigrationRequest
+CONSTANTS DonorForgetMigrationResponse = DonorForgetMigrationResponse
+
+CONSTANTS RecipientForgetMigrationRequest = RecipientForgetMigrationRequest
+CONSTANTS RecipientForgetMigrationResponse = RecipientForgetMigrationResponse
+
+CONSTANTS RecUnstarted = RecUnstarted
+CONSTANTS RecInconsistent = RecInconsistent
+CONSTANTS RecConsistent = RecConsistent
+CONSTANTS RecLagged = RecLagged
+CONSTANTS RecReady = RecReady
+CONSTANTS RecAborted = RecAborted
+CONSTANTS RecTerminalState = RecTerminalState
+
+CONSTANTS DonUnstarted = DonUnstarted
+CONSTANTS DonDataSync = DonDataSync
+CONSTANTS DonBlocking = DonBlocking
+CONSTANTS DonCommitted = DonCommitted
+CONSTANTS DonAborted = DonAborted
+CONSTANTS DonTerminalState = DonTerminalState
+
+CONSTANTS CloudUnknown = CloudUnknown
+CONSTANTS CloudCommitted = CloudCommitted
+CONSTANTS CloudAborted = CloudAborted
+CONSTANTS CloudTerminalState = CloudTerminalState
+
+CONSTANTS MigrationNone = MigrationNone
+CONSTANTS MigrationCommitted = MigrationCommitted
+CONSTANTS MigrationAborted = MigrationAborted
+
+CONSTANTS SyncOK = SyncOK
+CONSTANTS SyncAborted = SyncAborted
+
+CONSTANT MaxRequests = 8
+
+INVARIANT StateMachinesConsistent
+
+PROPERTY MigrationEventuallyCompletes
+PROPERTY MessageBagEventuallyEmpties
+PROPERTY EachRequestHasAResponse
+
+\* Not configurable.
+CONSTRAINT StateConstraint
+SPECIFICATION Spec
+
+\* The spec can terminate without a deadlock. The liveness properties are present to ensure the
+\* termination states are correct.
+CHECK_DEADLOCK FALSE
diff --git a/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.tla b/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.tla
new file mode 100644
index 00000000000..08ac48de0b2
--- /dev/null
+++ b/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.tla
@@ -0,0 +1,15 @@
+---- MODULE MCMultiTenantMigrations ----
+\* This module defines MCMultiTenantMigrations.tla constants/constraints for model-checking.
+
+EXTENDS MultiTenantMigrations
+
+CONSTANT MaxRequests
+
+(**************************************************************************************************)
+(* State Constraint. Used for model checking only. *)
+(**************************************************************************************************)
+
+StateConstraint ==
+ MaxRequests > totalRequests
+
+=============================================================================
diff --git a/src/mongo/tla_plus/MultiTenantMigrations/MultiTenantMigrations.tla b/src/mongo/tla_plus/MultiTenantMigrations/MultiTenantMigrations.tla
new file mode 100644
index 00000000000..fda343da0a7
--- /dev/null
+++ b/src/mongo/tla_plus/MultiTenantMigrations/MultiTenantMigrations.tla
@@ -0,0 +1,497 @@
+\* Copyright 2020 MongoDB, Inc.
+\*
+\* This work is licensed under:
+\* - Creative Commons Attribution-3.0 United States License
+\* http://creativecommons.org/licenses/by/3.0/us/
+
+----------------------------- MODULE MultiTenantMigrations -----------------------------
+\*
+\* A specification of MongoDB's multi-tenant migrations state-machine protocol.
+\*
+\* To run the model-checker, first edit the constants in MCMultiTenantMigrations.cfg if desired,
+\* then:
+\* cd src/mongo/tla_plus
+\* ./model-check.sh MultiTenantMigrations
+\*
+
+EXTENDS Integers, FiniteSets, Sequences, TLC
+
+\* Command requests and responses
+CONSTANTS DonorStartMigrationRequest, DonorStartMigrationResponse
+CONSTANTS RecipientSyncData1Request, RecipientSyncData1Response
+CONSTANTS RecipientSyncData2Request, RecipientSyncData2Response
+CONSTANTS DonorForgetMigrationRequest, DonorForgetMigrationResponse
+CONSTANTS RecipientForgetMigrationRequest, RecipientForgetMigrationResponse
+
+\* recipient states
+\* RecUnstarted -(syncData1)-> RecInconsistent -> RecConsistent -(syncData2)-> RecLagged -> RecReady
+CONSTANTS RecUnstarted, RecInconsistent, RecConsistent, RecLagged, RecReady, RecAborted,
+ RecTerminalState
+
+\* donor states
+\* DonUnstarted -(startMigration)-> DonDataSync -(syncData1Res)
+\* -> DonBlocking -(syncData2Res)-> DonCommitted
+CONSTANTS DonUnstarted, DonDataSync, DonBlocking, DonCommitted, DonAborted, DonTerminalState
+
+\* cloud state
+CONSTANTS CloudUnknown, CloudCommitted, CloudAborted, CloudTerminalState
+
+\* Responses to DonorStartMigration request
+CONSTANTS MigrationNone, MigrationCommitted, MigrationAborted
+
+\* Responses to RecipientSyncData1/2 requests
+CONSTANTS SyncOK, SyncAborted
+
+(**************************************************************************************************)
+(* Global variables *)
+(**************************************************************************************************)
+
+VARIABLE messages
+VARIABLE recipientState
+VARIABLE donorState
+VARIABLE cloudState
+VARIABLE totalRequests
+VARIABLE totalResponses
+VARIABLE recipientAborted
+
+donorVars == <<donorState>>
+recipientVars == <<recipientState, recipientAborted>>
+cloudVars == <<cloudState>>
+messageVars == <<messages, totalRequests, totalResponses>>
+vars == <<donorVars, recipientVars, cloudVars, messageVars>>
+
+-------------------------------------------------------------------------------------------
+
+(**************************************************************************************************)
+(* Network Helpers, adapted from https://github.com/ongardie/raft.tla/blob/master/raft.tla *)
+(**************************************************************************************************)
+
+\* Helper for Send. Given a message m and bag of messages, return a new bag of messages with one
+\* more m in it.
+WithMessage(m, msgs) ==
+ IF m \in DOMAIN msgs THEN
+ [msgs EXCEPT ![m] = msgs[m] + 1]
+ ELSE
+ msgs @@ (m :> 1)
+
+\* Helper for Discard and Reply. Given a message m and bag of messages, return a new bag of
+\* messages with one less m in it.
+WithoutMessage(m, msgs) ==
+ IF m \in DOMAIN msgs THEN
+ IF msgs[m] = 1 THEN
+ \* Remove message m from the bag.
+ [n \in DOMAIN msgs \ {m} |-> msgs[n]]
+ ELSE
+ [msgs EXCEPT ![m] = msgs[m] - 1]
+ ELSE
+ msgs
+
+IsRequest(m) ==
+ m.mType \in {DonorStartMigrationRequest, RecipientSyncData1Request,
+ RecipientSyncData2Request, DonorForgetMigrationRequest,
+ RecipientForgetMigrationRequest}
+
+IncTotalMessages(m) ==
+ IF IsRequest(m) THEN
+ /\ totalRequests' = totalRequests + 1
+ /\ UNCHANGED <<totalResponses>>
+ ELSE
+ /\ totalResponses' = totalResponses + 1
+ /\ UNCHANGED <<totalRequests>>
+
+\* Add a message to the bag of messages.
+Send(m) ==
+ /\ messages' = WithMessage(m, messages)
+ /\ IncTotalMessages(m)
+
+\* Remove a message from the bag of messages. Used when a server is done processing a message.
+Discard(m) ==
+ /\ messages' = WithoutMessage(m, messages)
+ /\ UNCHANGED <<totalRequests, totalResponses>>
+
+\* Helper that both sends a message and discards a message.
+SendAndDiscard(sendMessage, discardMessage) ==
+ /\ messages' = WithoutMessage(discardMessage, WithMessage(sendMessage, messages))
+ /\ IncTotalMessages(sendMessage)
+
+(**************************************************************************************************)
+(* Request and response handlers *)
+(**************************************************************************************************)
+
+\* Helper to create the donorStartMigration response based on the donor state.
+DonorStartMigrationResponseGen ==
+ CASE donorState = DonAborted ->
+ [mType |-> DonorStartMigrationResponse,
+ mOutcome |-> MigrationAborted]
+ [] donorState = DonCommitted ->
+ [mType |-> DonorStartMigrationResponse,
+ mOutcome |-> MigrationCommitted]
+ [] donorState \in {DonUnstarted, DonDataSync, DonBlocking, DonTerminalState} ->
+ [mType |-> DonorStartMigrationResponse,
+ mOutcome |-> MigrationNone]
+
+\* Donor
+HandleDonorStartMigrationRequest(m) ==
+ /\ m.mType = DonorStartMigrationRequest
+ \* If the donor is unstarted, it starts, otherwise nothing happens. Either way sends a response
+ \* to cloud.
+ /\ CASE donorState = DonUnstarted ->
+ /\ donorState' = DonDataSync
+ /\ messages' = WithoutMessage(m,
+ \* Send an immediate response to cloud.
+ WithMessage(DonorStartMigrationResponseGen,
+ \* Send a recipientSyncData1 request to the recipient.
+ WithMessage([mType |-> RecipientSyncData1Request], messages)))
+ /\ totalRequests' = totalRequests + 1
+ /\ totalResponses' = totalResponses + 1
+ [] donorState \in {DonDataSync, DonBlocking, DonCommitted, DonAborted, DonTerminalState} ->
+ /\ SendAndDiscard(DonorStartMigrationResponseGen, m)
+ /\ UNCHANGED <<donorVars>>
+ /\ UNCHANGED <<recipientVars, cloudVars>>
+
+\* Cloud
+HandleDonorStartMigrationResponse(m) ==
+ /\ m.mType = DonorStartMigrationResponse
+ \* Updates the cloud state to whatever the donor specifies, if specified.
+ /\ CASE m.mOutcome = MigrationNone ->
+ UNCHANGED <<cloudState>>
+ [] m.mOutcome = MigrationCommitted ->
+ cloudState' = CloudCommitted
+ [] m.mOutcome = MigrationAborted ->
+ cloudState' = CloudAborted
+ /\ Discard(m)
+ /\ UNCHANGED <<donorVars, recipientVars>>
+
+\* Recipient
+HandleRecipientSyncData1Request(m) ==
+ /\ m.mType = RecipientSyncData1Request
+ \* We don't want to handle this request being processed while inconsistent, since that would
+ \* require modeling request joining behavior, which is unnecessary complexity for the
+ \* purposes of this model. A recipientSyncData1 request being processed while in RecInconsistent
+ \* must be a duplicate message.
+ /\ recipientState \notin {RecInconsistent}
+ /\ CASE recipientState = RecUnstarted ->
+ \* Starts the migration. The recipient does not respond to the donor until it is
+ \* consistent.
+ /\ recipientState' = RecInconsistent
+ /\ Discard(m)
+ /\ UNCHANGED <<recipientAborted>>
+ [] recipientState = RecAborted ->
+ \* Sends a response to the donor that the migration aborted.
+ /\ SendAndDiscard([mType |-> RecipientSyncData1Response,
+ mSyncStatus |-> SyncAborted], m)
+ /\ UNCHANGED <<recipientVars>>
+ [] recipientState \in {RecConsistent, RecLagged, RecReady} ->
+ \* This is a duplicate message, resend the response we must have sent previously.
+ /\ SendAndDiscard([mType |-> RecipientSyncData1Response,
+ mSyncStatus |-> SyncOK], m)
+ /\ UNCHANGED <<recipientVars>>
+ [] recipientState = RecTerminalState ->
+ \* This migration has finished, which means the donor and cloud already have committed
+ \* or aborted. Send SyncOK, which will be ignored.
+ /\ SendAndDiscard([mType |-> RecipientSyncData1Response,
+ mSyncStatus |-> SyncOK], m)
+ /\ UNCHANGED <<recipientVars>>
+ /\ UNCHANGED <<donorVars, cloudVars>>
+
+\* Factored out of below to make nested Case statements clearer.
+HandleRecipientSyncData1Response_SyncOK(m) ==
+ CASE donorState = DonDataSync ->
+ \* Move the state machine to "blocking" and send recipientSyncData2.
+ /\ donorState' = DonBlocking
+ /\ SendAndDiscard([mType |-> RecipientSyncData2Request], m)
+ [] donorState \in {DonBlocking, DonCommitted, DonAborted, DonTerminalState} ->
+ \* Just ignore this message, since we're past this step in the protocol
+ \* and this is a delayed message.
+ /\ Discard(m)
+ /\ UNCHANGED <<donorState>>
+
+\* Factored out of below to make nested Case statements clearer.
+HandleRecipientSyncData1Response_SyncAborted(m) ==
+ /\ CASE donorState = DonDataSync ->
+ \* The recipient failed the migration, so abort.
+ \* We can only get this response in DonDataSync or DonBlocking.
+ \* DataSync is the common case, but Blocking can happen when there are two
+ \* recipientSyncData1 responses and the "OK" one is processed first.
+ donorState' = DonAborted
+ [] donorState \in {DonBlocking, DonAborted, DonTerminalState} ->
+ \* If the migration is in DonBlocking, we ignore the response. The migration will be
+ \* aborted on the donor when it receives the recipientSyncData2 response.
+ \* If the migration is already aborted or finished, do nothing.
+ UNCHANGED <<donorState>>
+ /\ Discard(m)
+
+\* Donor
+HandleRecipientSyncData1Response(m) ==
+ /\ m.mType = RecipientSyncData1Response
+ /\ CASE m.mSyncStatus = SyncOK ->
+ HandleRecipientSyncData1Response_SyncOK(m)
+ [] m.mSyncStatus = SyncAborted ->
+ HandleRecipientSyncData1Response_SyncAborted(m)
+ /\ UNCHANGED <<recipientVars, cloudVars>>
+
+\* Recipient
+HandleRecipientSyncData2Request(m) ==
+ /\ m.mType = RecipientSyncData2Request
+ \* We don't want to handle this request being processed while lagged, since that would
+ \* require modeling request joining behavior, which is unnecessary complexity for the
+ \* purposes of this model. A recipientSyncData2 request being processed while in RecLagged
+ \* must be a duplicate message.
+ /\ recipientState \notin {RecLagged}
+ /\ CASE recipientState = RecConsistent ->
+ \* Move the state machine to "lagged", since the recipient now knows the ending
+ \* timestamp. The recipient does not respond to the donor until it has caught up.
+ /\ recipientState' = RecLagged
+ /\ Discard(m)
+ /\ UNCHANGED <<recipientAborted>>
+ [] recipientState = RecAborted ->
+ \* Sends a response to the donor that the migration aborted.
+ /\ SendAndDiscard([mType |-> RecipientSyncData2Response,
+ mSyncStatus |-> SyncAborted], m)
+ /\ UNCHANGED <<recipientVars>>
+ [] recipientState = RecReady ->
+ \* This is a duplicate message, resend the response we must have sent previously.
+ /\ SendAndDiscard([mType |-> RecipientSyncData2Response,
+ mSyncStatus |-> SyncOK], m)
+ /\ UNCHANGED <<recipientVars>>
+ [] recipientState = RecTerminalState ->
+ \* This migration is finished, which means the donor and cloud already have committed
+ \* or aborted. Send SyncOK, which will be ignored.
+ /\ SendAndDiscard([mType |-> RecipientSyncData2Response,
+ mSyncStatus |-> SyncOK], m)
+ /\ UNCHANGED <<recipientVars>>
+ /\ UNCHANGED <<donorVars, cloudVars>>
+
+\* Factored out of below to make nested Case statements clearer.
+HandleRecipientSyncData2Response_SyncOK ==
+ CASE donorState = DonBlocking ->
+ \* The recipient is done!
+ donorState' = DonCommitted
+ [] donorState \in {DonCommitted, DonAborted, DonTerminalState} ->
+ \* Just ignore this message, since we're past this step in the protocol
+ \* and this is a delayed message.
+ UNCHANGED <<donorState>>
+
+\* Factored out of below to make nested Case statements clearer.
+HandleRecipientSyncData2Response_SyncAborted ==
+ CASE donorState = DonBlocking ->
+ \* The recipient failed the migration, so abort.
+ donorState' = DonAborted
+ [] donorState \in {DonAborted, DonTerminalState} ->
+ \* If the migration is already aborted or finished, do nothing.
+ UNCHANGED <<donorState>>
+\* Donor
+HandleRecipientSyncData2Response(m) ==
+ /\ m.mType = RecipientSyncData2Response
+ /\ CASE m.mSyncStatus = SyncOK ->
+ HandleRecipientSyncData2Response_SyncOK
+ [] m.mSyncStatus = SyncAborted ->
+ HandleRecipientSyncData2Response_SyncAborted
+ /\ Discard(m)
+ /\ UNCHANGED <<recipientVars, cloudVars>>
+
+\* Donor
+HandleDonorForgetMigrationRequest(m) ==
+ /\ m.mType = DonorForgetMigrationRequest
+ \* Don't mark donor finished until recipient is.
+ /\ SendAndDiscard([mType |-> RecipientForgetMigrationRequest], m)
+ /\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
+
+\* Cloud
+HandleDonorForgetMigrationResponse(m) ==
+ /\ m.mType = DonorForgetMigrationResponse
+ \* The donor and recipient unconditionally finish the migration, so cloud can too.
+ /\ cloudState' = CloudTerminalState
+ /\ Discard(m)
+ /\ UNCHANGED <<donorVars, recipientVars>>
+
+\* Recipient
+HandleRecipientForgetMigrationRequest(m) ==
+ /\ m.mType = RecipientForgetMigrationRequest
+ \* Finish the migration no matter what, and tell the donor.
+ /\ recipientState' = RecTerminalState
+ /\ SendAndDiscard([mType |-> RecipientForgetMigrationResponse], m)
+ /\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
+
+\* Donor
+HandleRecipientForgetMigrationResponse(m) ==
+ /\ m.mType = RecipientForgetMigrationResponse
+ \* The recipient has finished the migration, so now the donor can finish the migration and
+ \* respond to cloud that it has finished the migration.
+ /\ donorState' = DonTerminalState
+ /\ SendAndDiscard([mType |-> DonorForgetMigrationResponse], m)
+ /\ UNCHANGED <<recipientVars, cloudVars>>
+
+
+(******************************************************************************)
+(* [ACTION] *)
+(******************************************************************************)
+
+\* Models a retry of recipientSyncData1.
+DonorSendsRecipientSyncData1Request ==
+ /\ donorState = DonDataSync
+ /\ Send([mType |-> RecipientSyncData1Request])
+ /\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
+
+\* Models a retry of recipientSyncData2.
+DonorSendsRecipientSyncData2Request ==
+ /\ donorState = DonBlocking
+ /\ Send([mType |-> RecipientSyncData2Request])
+ /\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
+
+CloudSendsDonorStartMigrationRequest ==
+ /\ cloudState = CloudUnknown
+ /\ Send([mType |-> DonorStartMigrationRequest])
+ /\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
+
+CloudSendsDonorForgetMigrationRequest ==
+ /\ cloudState \in {CloudAborted, CloudCommitted}
+ /\ Send([mType |-> DonorForgetMigrationRequest])
+ /\ UNCHANGED <<donorVars, recipientVars, cloudVars>>
+
+RecipientBecomesConsistent ==
+ /\ recipientState = RecInconsistent
+ /\ recipientState' = RecConsistent
+ /\ Send([mType |-> RecipientSyncData1Response,
+ mSyncStatus |-> SyncOK])
+ /\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
+
+RecipientCatchesUp ==
+ /\ recipientState = RecLagged
+ /\ recipientState' = RecReady
+ /\ Send([mType |-> RecipientSyncData2Response,
+ mSyncStatus |-> SyncOK])
+ /\ UNCHANGED <<donorVars, cloudVars, recipientAborted>>
+
+RecipientFailsMigration ==
+ \* Recipient can't fail after it's ready, finished, or already aborted.
+ /\ recipientState \notin {RecReady, RecAborted, RecTerminalState}
+ /\ recipientState' = RecAborted
+ /\ recipientAborted' = TRUE
+ /\ CASE recipientState = RecInconsistent ->
+ \* When "inconsistent" the recipient has an active recipientSyncData1 request.
+ Send([mType |-> RecipientSyncData1Response,
+ mSyncStatus |-> SyncAborted])
+ [] recipientState = RecLagged ->
+ \* When "lagged" the recipient has an active recipientSyncData2 request.
+ Send([mType |-> RecipientSyncData2Response,
+ mSyncStatus |-> SyncAborted])
+ [] recipientState \in {RecUnstarted, RecConsistent} ->
+ \* Nothing happens, besides setting the state to aborted. On transitioning to
+ \* "consistent" the recipient already sent a response to the donor and should not
+ \* send a conflicting response.
+ UNCHANGED <<messageVars>>
+ /\ UNCHANGED <<cloudVars, donorVars>>
+
+(**************************************************************************************************)
+(* Correctness Properties *)
+(**************************************************************************************************)
+
+StateMachinesInconsistent ==
+ \/ /\ cloudState = CloudCommitted
+ /\ \/ recipientState \notin {RecReady, RecTerminalState}
+ \/ recipientAborted = TRUE
+ \/ donorState \notin {DonCommitted, DonTerminalState}
+ \/ /\ donorState = DonCommitted
+ /\ \/ recipientState \notin {RecReady, RecTerminalState}
+ \/ recipientAborted = TRUE
+
+StateMachinesConsistent == ~StateMachinesInconsistent
+
+(**************************************************************************************************)
+(* Liveness properties *)
+(**************************************************************************************************)
+
+\* Checks that the state machines eventually converge on terminating states.
+MigrationEventuallyCompletes ==
+ <> /\ recipientState = RecTerminalState
+ /\ donorState = DonTerminalState
+ /\ cloudState = CloudTerminalState
+
+\* Checks that if the bag fills up, it eventually empties.
+MessageBagEventuallyEmpties ==
+ Cardinality(DOMAIN messages) > 0 ~> Cardinality(DOMAIN messages) = 0
+
+\* Checks that the number of totalRequests eventually equals the number of totalResponses,
+\* and stays that way. This will always be right before termination.
+EachRequestHasAResponse ==
+ <>[] (totalRequests = totalResponses)
+
+(**************************************************************************************************)
+(* Spec definition *)
+(**************************************************************************************************)
+Init ==
+ /\ messages = [m \in {} |-> 0]
+ /\ donorState = DonUnstarted
+ /\ recipientState = RecUnstarted
+ /\ cloudState = CloudUnknown
+ /\ totalRequests = 0
+ /\ totalResponses = 0
+ /\ recipientAborted = FALSE
+
+RecipientBecomesConsistentAction == RecipientBecomesConsistent
+RecipientCatchesUpAction == RecipientCatchesUp
+RecipientFailsMigrationAction == RecipientFailsMigration
+CloudSendsDonorStartMigrationRequestAction == CloudSendsDonorStartMigrationRequest
+CloudSendsDonorForgetMigrationRequestAction == CloudSendsDonorForgetMigrationRequest
+DonorSendsRecipientSyncData1RequestAction == DonorSendsRecipientSyncData1Request
+DonorSendsRecipientSyncData2RequestAction == DonorSendsRecipientSyncData2Request
+
+ReceiveDonorStartMigrationRequestAction == \E m \in DOMAIN messages :
+ HandleDonorStartMigrationRequest(m)
+ReceiveDonorStartMigrationResponseAction == \E m \in DOMAIN messages :
+ HandleDonorStartMigrationResponse(m)
+ReceiveRecipientSyncData1RequestAction == \E m \in DOMAIN messages :
+ HandleRecipientSyncData1Request(m)
+ReceiveRecipientSyncData1ResponseAction == \E m \in DOMAIN messages :
+ HandleRecipientSyncData1Response(m)
+ReceiveRecipientSyncData2RequestAction == \E m \in DOMAIN messages :
+ HandleRecipientSyncData2Request(m)
+ReceiveRecipientSyncData2ResponseAction == \E m \in DOMAIN messages :
+ HandleRecipientSyncData2Response(m)
+ReceiveDonorForgetMigrationRequestAction == \E m \in DOMAIN messages :
+ HandleDonorForgetMigrationRequest(m)
+ReceiveDonorForgetMigrationResponseAction == \E m \in DOMAIN messages :
+ HandleDonorForgetMigrationResponse(m)
+ReceiveRecipientForgetMigrationRequestAction == \E m \in DOMAIN messages :
+ HandleRecipientForgetMigrationRequest(m)
+ReceiveRecipientForgetMigrationResponseAction == \E m \in DOMAIN messages :
+ HandleRecipientForgetMigrationResponse(m)
+
+Next ==
+ \/ RecipientBecomesConsistentAction
+ \/ RecipientCatchesUpAction
+ \/ RecipientFailsMigrationAction
+ \/ CloudSendsDonorStartMigrationRequestAction
+ \/ CloudSendsDonorForgetMigrationRequestAction
+ \/ DonorSendsRecipientSyncData1RequestAction
+ \/ DonorSendsRecipientSyncData2RequestAction
+ \/ ReceiveDonorStartMigrationRequestAction
+ \/ ReceiveDonorStartMigrationResponseAction
+ \/ ReceiveRecipientSyncData1RequestAction
+ \/ ReceiveRecipientSyncData1ResponseAction
+ \/ ReceiveRecipientSyncData2RequestAction
+ \/ ReceiveRecipientSyncData2ResponseAction
+ \/ ReceiveDonorForgetMigrationRequestAction
+ \/ ReceiveDonorForgetMigrationResponseAction
+ \/ ReceiveRecipientForgetMigrationRequestAction
+ \/ ReceiveRecipientForgetMigrationResponseAction
+
+\* Add fairness constraints so the above liveness properties are met.
+Liveness ==
+ /\ WF_vars(ReceiveDonorStartMigrationRequestAction)
+ /\ WF_vars(ReceiveDonorStartMigrationResponseAction)
+ /\ WF_vars(ReceiveRecipientSyncData1RequestAction)
+ /\ WF_vars(ReceiveRecipientSyncData1ResponseAction)
+ /\ WF_vars(ReceiveRecipientSyncData2RequestAction)
+ /\ WF_vars(ReceiveRecipientSyncData2ResponseAction)
+ /\ WF_vars(ReceiveDonorForgetMigrationRequestAction)
+ /\ WF_vars(ReceiveDonorForgetMigrationResponseAction)
+ /\ WF_vars(ReceiveRecipientForgetMigrationRequestAction)
+ /\ WF_vars(ReceiveRecipientForgetMigrationResponseAction)
+ /\ WF_vars(CloudSendsDonorStartMigrationRequestAction)
+ /\ WF_vars(CloudSendsDonorForgetMigrationRequestAction)
+
+Spec == Init /\ [][Next]_vars /\ Liveness
+
+=============================================================================
diff --git a/src/mongo/tla_plus/README.md b/src/mongo/tla_plus/README.md
new file mode 100644
index 00000000000..04b22f406d3
--- /dev/null
+++ b/src/mongo/tla_plus/README.md
@@ -0,0 +1,23 @@
+Replication TLA+ Specifications
+===============================
+
+These are formal specifications for exploring possible replication protocols.
+Some are experiments, some reflect MongoDB's actual implementation. See the
+comments in each spec for details.
+
+Some specs are intended for model-checking. They are in subdirectories like:
+
+```
+SpecName/
+ SpecName.tla specification
+ MCSpecName.tla additional operators for model-checking
+ MCSpecName.cfg configuration for model-checking
+```
+
+Run the model-checker on a spec thus:
+
+```
+./model-check.sh SpecName
+```
+
+There may be additional instructions in the spec or config file.
diff --git a/src/mongo/tla_plus/RaftMongo/MCRaftMongo.cfg b/src/mongo/tla_plus/RaftMongo/MCRaftMongo.cfg
new file mode 100644
index 00000000000..3724efb7f0b
--- /dev/null
+++ b/src/mongo/tla_plus/RaftMongo/MCRaftMongo.cfg
@@ -0,0 +1,30 @@
+\* Config file to run the TLC model-checker on RaftMongo.tla.
+\* See RaftMongo.tla for instructions.
+
+\* The maximum number of oplog entries that can be created on the primary in one
+\* action. For model-checking, this can be 1 or a small number.
+CONSTANT MaxClientWriteSize = 2
+
+\* The number of election terms to simulate during model-checking.
+CONSTANT MaxTerm = 3
+
+\* The longest oplog any server can reach during model-checking.
+CONSTANT MaxLogLen = 3
+
+\* The set of server IDs. Adjust to test different replica set sizes.
+CONSTANT Server = {1, 2, 3}
+
+INVARIANT NoTwoPrimariesInSameTerm
+
+\* NeverRollbackCommitted and NeverRollbackBeforeCommitPoint can be violated,
+\* although it's not ultimately a safety issue: SERVER-39626. The issue
+\* requires at least 5 servers, 3 terms, and oplogs of length 4+, which are
+\* larger limits than we can easily model-check.
+INVARIANT NeverRollbackCommitted
+INVARIANT NeverRollbackBeforeCommitPoint
+
+PROPERTY CommitPointEventuallyPropagates
+
+\* Not configurable.
+CONSTRAINT StateConstraint
+SPECIFICATION Spec
diff --git a/src/mongo/tla_plus/RaftMongo/MCRaftMongo.tla b/src/mongo/tla_plus/RaftMongo/MCRaftMongo.tla
new file mode 100644
index 00000000000..057e3dd7a91
--- /dev/null
+++ b/src/mongo/tla_plus/RaftMongo/MCRaftMongo.tla
@@ -0,0 +1,13 @@
+---- MODULE MCRaftMongo ----
+\* This module defines RaftMongo.tla constants/constraints for model-checking.
+\* See RaftMongo.tla for instructions.
+
+EXTENDS RaftMongo
+
+CONSTANT MaxTerm
+CONSTANT MaxLogLen
+
+StateConstraint ==
+ /\ GlobalCurrentTerm <= MaxTerm
+ /\ \forall i \in Server: Len(log[i]) <= MaxLogLen
+=============================================================================
diff --git a/src/mongo/tla_plus/RaftMongo/RaftMongo.tla b/src/mongo/tla_plus/RaftMongo/RaftMongo.tla
new file mode 100644
index 00000000000..bb1ce3f6684
--- /dev/null
+++ b/src/mongo/tla_plus/RaftMongo/RaftMongo.tla
@@ -0,0 +1,331 @@
+\* Copyright 2019 MongoDB, Inc.
+\*
+\* This work is licensed under:
+\* - Creative Commons Attribution-3.0 United States License
+\* http://creativecommons.org/licenses/by/3.0/us/
+
+--------------------------------- MODULE RaftMongo ---------------------------------
+\* This is the formal specification for the Raft consensus algorithm in MongoDB.
+\*
+\* To run the model-checker, first edit the constants in MCRaftMongo.cfg if desired, then:
+\* cd src/mongo/tla_plus
+\* ./model-check.sh RaftMongo
+
+EXTENDS Integers, FiniteSets, Sequences
+
+\* The set of server IDs.
+CONSTANT Server
+
+\* The maximum number of oplog entries that can be created on the primary in one
+\* action. For model-checking, this can be 1 or a small number.
+CONSTANT MaxClientWriteSize
+
+\* The set of log entries that have been acknowledged as committed, both "immediately committed" and
+\* "prefix committed".
+VARIABLE committedEntries
+
+----
+\* The following variables are all per server (functions with domain Server).
+
+\* The server's term number.
+VARIABLE currentTerm
+
+\* The server's state ("Follower" or "Leader").
+VARIABLE state
+
+\* The commit point learned by each server.
+VARIABLE commitPoint
+
+electionVars == <<currentTerm, state>>
+serverVars == <<electionVars, commitPoint>>
+
+\* A Sequence of log entries. The index into this sequence is the index of the
+\* log entry. Unfortunately, the Sequence module defines Head(s) as the entry
+\* with index 1, so be careful not to use that!
+VARIABLE log
+logVars == <<committedEntries, log>>
+
+\* End of per server variables.
+----
+
+\* All variables; used for stuttering (asserting state hasn't changed).
+vars == <<serverVars, logVars>>
+
+----
+\* Helpers
+
+IsMajority(servers) == Cardinality(servers) * 2 > Cardinality(Server)
+GetTerm(xlog, index) == IF index = 0 THEN 0 ELSE xlog[index].term
+LogTerm(i, index) == GetTerm(log[i], index)
+LastTerm(xlog) == GetTerm(xlog, Len(xlog))
+Leaders == {s \in Server : state[s] = "Leader"}
+Range(f) == {f[x] : x \in DOMAIN f}
+
+\* Return the maximum value from a set, or undefined if the set is empty.
+Max(s) == CHOOSE x \in s : \A y \in s : x >= y
+
+GlobalCurrentTerm == Max(Range(currentTerm))
+
+\* Server i is allowed to sync from server j.
+CanSyncFrom(i, j) ==
+ /\ Len(log[i]) < Len(log[j])
+ /\ LastTerm(log[i]) = LogTerm(j, Len(log[i]))
+
+\* Server "me" is ahead of or caught up to server j.
+NotBehind(me, j) == \/ LastTerm(log[me]) > LastTerm(log[j])
+ \/ /\ LastTerm(log[me]) = LastTerm(log[j])
+ /\ Len(log[me]) >= Len(log[j])
+
+\* The set of nodes that has log[me][logIndex] in their oplog
+Agree(me, logIndex) ==
+ { node \in Server :
+ /\ Len(log[node]) >= logIndex
+ /\ LogTerm(me, logIndex) = LogTerm(node, logIndex) }
+
+\* Return whether Node i can learn the commit point from Node j.
+CommitPointLessThan(i, j) ==
+ \/ commitPoint[i].term < commitPoint[j].term
+ \/ /\ commitPoint[i].term = commitPoint[j].term
+ /\ commitPoint[i].index < commitPoint[j].index
+
+\* Is it possible for node i's log to roll back based on j's log? If true, it
+\* implies that i's log should remove entries to become a prefix of j's.
+CanRollbackOplog(i, j) ==
+ /\ Len(log[i]) > 0
+ /\ \* The log with later term is more up-to-date
+ LastTerm(log[i]) < LastTerm(log[j])
+ /\
+ \/ Len(log[i]) > Len(log[j])
+ \/ /\ Len(log[i]) <= Len(log[j])
+ /\ LastTerm(log[i]) /= LogTerm(j, Len(log[i]))
+
+----
+\* Define initial values for all variables
+
+InitServerVars == /\ currentTerm = [i \in Server |-> 0]
+ /\ state = [i \in Server |-> "Follower"]
+ /\ commitPoint = [i \in Server |-> [term |-> 0, index |-> 0]]
+InitLogVars == /\ log = [i \in Server |-> << >>]
+ /\ committedEntries = {}
+Init == /\ InitServerVars
+ /\ InitLogVars
+
+----
+\* Message handlers
+\* i = recipient, j = sender
+
+\* Receive one or more oplog entries from j.
+AppendOplog(i, j) ==
+ /\ CanSyncFrom(i, j)
+ /\ state[i] = "Follower"
+ /\ \E lastAppended \in (Len(log[i]) + 1)..Len(log[j]):
+ LET appendedEntries == SubSeq(log[j], Len(log[i]) + 1, lastAppended)
+ IN log' = [log EXCEPT ![i] = log[i] \o appendedEntries]
+ /\ UNCHANGED <<committedEntries, serverVars>>
+
+\* Node i learns the commit point from j via heartbeat.
+LearnCommitPoint(i, j) ==
+ /\ CommitPointLessThan(i, j)
+ /\ commitPoint' = [commitPoint EXCEPT ![i] = commitPoint[j]]
+ /\ UNCHANGED <<committedEntries, electionVars, logVars>>
+
+RollbackOplog(i, j) ==
+ /\ CanRollbackOplog(i, j)
+ \* Rollback 1 oplog entry
+ /\ LET new == [index2 \in 1..(Len(log[i]) - 1) |-> log[i][index2]]
+ IN log' = [log EXCEPT ![i] = new]
+ /\ UNCHANGED <<serverVars, committedEntries>>
+
+\* ACTION
+\* Node i is elected by a majority, and nodes that voted for it can't still be primary.
+\* A stale primary might persist among the minority that didn't vote for it.
+BecomePrimaryByMagic(i, ayeVoters) ==
+ /\ \A j \in ayeVoters : /\ NotBehind(i, j)
+ /\ currentTerm[j] <= currentTerm[i]
+ /\ IsMajority(ayeVoters)
+ /\ state' = [index \in Server |-> IF index \notin ayeVoters
+ THEN state[index]
+ ELSE IF index = i THEN "Leader" ELSE "Follower"]
+ /\ currentTerm' = [index \in Server |-> IF index \in (ayeVoters \union {i})
+ THEN currentTerm[i] + 1
+ ELSE currentTerm[index]]
+ /\ UNCHANGED <<committedEntries, commitPoint, logVars>>
+
+
+\* ACTION
+\* Node i is leader and steps down for any reason.
+Stepdown(i) ==
+ /\ state[i] = "Leader"
+ /\ state' = [state EXCEPT ![i] = "Follower"]
+ /\ UNCHANGED <<committedEntries, currentTerm, commitPoint, logVars>>
+
+\* ACTION
+\* Leader i receives a client request to add one or more entries to the log.
+\* There can be multiple leaders, each in a different term. A leader writes
+\* an oplog entry in its own term.
+ClientWrite(i) ==
+ /\ state[i] = "Leader"
+ /\ \E numEntries \in 1..MaxClientWriteSize :
+ LET entry == [term |-> currentTerm[i]]
+ newEntries == [ j \in 1..numEntries |-> entry ]
+ newLog == log[i] \o newEntries
+ IN log' = [log EXCEPT ![i] = newLog]
+ /\ UNCHANGED <<committedEntries, serverVars>>
+
+UpdateTermThroughHeartbeat(i, j) ==
+ /\ currentTerm[j] > currentTerm[i]
+ /\ currentTerm' = [currentTerm EXCEPT ![i] = currentTerm[j]]
+ /\ state' = [state EXCEPT ![i] = "Follower"]
+ /\ UNCHANGED <<commitPoint, logVars>>
+
+\* ACTION
+AdvanceCommitPoint ==
+ \E leader \in Leaders :
+ \E acknowledgers \in SUBSET Server :
+ \* New commitPoint is any committed log index after current commitPoint
+ \E committedIndex \in (commitPoint[leader].index + 1)..Len(log[leader]) :
+ /\ IsMajority(acknowledgers)
+ /\ acknowledgers \subseteq Agree(leader, committedIndex)
+ \* New commitPoint is an entry written by this leader.
+ /\ LogTerm(leader, committedIndex) = currentTerm[leader]
+ \* If an acknowledger has a higher term, the leader would step down.
+ /\ \A j \in acknowledgers : currentTerm[j] <= currentTerm[leader]
+ /\ LET newCommitPoint == [
+ term |-> LogTerm(leader, committedIndex),
+ index |-> committedIndex
+ ]
+ IN /\ commitPoint' = [commitPoint EXCEPT ![leader] = newCommitPoint]
+ /\ committedEntries' = committedEntries \union {[
+ term |-> LogTerm(leader, i),
+ index |-> i
+ ] : i \in commitPoint[leader].index + 1..committedIndex}
+ /\ UNCHANGED <<electionVars, log>>
+
+\* ACTION
+\* Node i learns the commit point from j via heartbeat with term check
+LearnCommitPointWithTermCheck(i, j) ==
+ /\ LastTerm(log[i]) = commitPoint[j].term
+ /\ LearnCommitPoint(i, j)
+
+\* ACTION
+\* Node i learns the commit point from j while tailing j's oplog
+LearnCommitPointFromSyncSourceNeverBeyondLastApplied(i, j) ==
+ \* j is a potential sync source, either ahead of or equal to i's oplog
+ /\ \/ CanSyncFrom(i, j)
+ \/ log[i] = log[j]
+ /\ CommitPointLessThan(i, j)
+ \* Never beyond last applied
+ /\ LET myCommitPoint ==
+ \* If j's term is less than or equal to i's, commit point can be ahead.
+ IF commitPoint[j].term <= LastTerm(log[i])
+ THEN commitPoint[j]
+ ELSE [term |-> LastTerm(log[i]), index |-> Len(log[i])]
+ IN commitPoint' = [commitPoint EXCEPT ![i] = myCommitPoint]
+ /\ UNCHANGED <<committedEntries, electionVars, logVars>>
+
+----
+AppendOplogAction ==
+ \E i,j \in Server : AppendOplog(i, j)
+
+RollbackOplogAction ==
+ \E i,j \in Server : RollbackOplog(i, j)
+
+BecomePrimaryByMagicAction ==
+ \E i \in Server : \E ayeVoters \in SUBSET(Server) : BecomePrimaryByMagic(i, ayeVoters)
+
+StepdownAction ==
+ \E i \in Server : Stepdown(i)
+
+ClientWriteAction ==
+ \E i \in Server : ClientWrite(i)
+
+UpdateTermThroughHeartbeatAction ==
+ \E i, j \in Server : UpdateTermThroughHeartbeat(i, j)
+
+LearnCommitPointWithTermCheckAction ==
+ \E i, j \in Server : LearnCommitPointWithTermCheck(i, j)
+
+LearnCommitPointFromSyncSourceNeverBeyondLastAppliedAction ==
+ \E i, j \in Server : LearnCommitPointFromSyncSourceNeverBeyondLastApplied(i, j)
+
+----
+
+\* Defines how the variables may transition.
+\*
+\* MongoDB's commit point learning protocol has evolved as we discovered
+\* protocol bugs, see:
+\*
+\* https://conf.tlapl.us/07_-_TLAConf19_-_William_Schultz_-_Fixing_a_MongoDB_Replication_Protocol_Bug_with_TLA.pdf
+\*
+Next ==
+ \* --- Replication protocol
+ \/ AppendOplogAction
+ \/ RollbackOplogAction
+ \/ BecomePrimaryByMagicAction
+ \/ StepdownAction
+ \/ ClientWriteAction
+ \/ UpdateTermThroughHeartbeatAction
+ \*
+ \* --- Commit point learning protocol
+ \/ AdvanceCommitPoint
+ \/ LearnCommitPointWithTermCheckAction
+ \/ LearnCommitPointFromSyncSourceNeverBeyondLastAppliedAction
+
+SpecBehavior == Init /\ [][Next]_vars
+
+Liveness ==
+ /\ SF_vars(AppendOplogAction)
+ /\ SF_vars(RollbackOplogAction)
+ \* A new primary should eventually write one entry.
+ /\ WF_vars(\E i \in Server : LastTerm(log[i]) # GlobalCurrentTerm /\ ClientWrite(i))
+ \*
+ /\ WF_vars(AdvanceCommitPoint)
+ /\ SF_vars(LearnCommitPointWithTermCheckAction)
+ /\ SF_vars(LearnCommitPointFromSyncSourceNeverBeyondLastAppliedAction)
+
+\* The specification must start with the initial state and transition according
+\* to Next.
+Spec == SpecBehavior /\ Liveness
+
+----
+
+\* Properties to check
+
+TwoPrimariesInSameTerm ==
+ \E i, j \in Server :
+ /\ i # j
+ /\ currentTerm[i] = currentTerm[j]
+ /\ state[i] = "Leader"
+ /\ state[j] = "Leader"
+
+NoTwoPrimariesInSameTerm == ~TwoPrimariesInSameTerm
+
+RollbackCommitted(i) ==
+ /\ [term |-> LastTerm(log[i]), index |-> Len(log[i])] \in committedEntries
+ /\ \E j \in Server: CanRollbackOplog(i, j)
+
+NeverRollbackCommitted ==
+ \A i \in Server: ~RollbackCommitted(i)
+
+RollbackBeforeCommitPoint(i) ==
+ /\ \E j \in Server:
+ /\ CanRollbackOplog(i, j)
+ /\ \/ LastTerm(log[i]) < commitPoint[i].term
+ \/ /\ LastTerm(log[i]) = commitPoint[i].term
+ /\ Len(log[i]) <= commitPoint[i].index
+
+NeverRollbackBeforeCommitPoint == \A i \in Server: ~RollbackBeforeCommitPoint(i)
+
+\* Liveness check
+
+\* This isn't accurate for any infinite behavior specified by Spec, but it's fine
+\* for any finite behavior with the liveness we can check with the model checker.
+\* This is to check at any time, if two nodes' commit points are not the same, they
+\* will be the same eventually.
+\* This is checked after all possible rollback is done.
+CommitPointEventuallyPropagates ==
+ /\ \A i, j \in Server:
+ [](commitPoint[i] # commitPoint[j] ~>
+ <>(~ENABLED RollbackOplogAction => commitPoint[i] = commitPoint[j]))
+
+===============================================================================
diff --git a/src/mongo/tla_plus/RaftMongoWithRaftReconfig.tla b/src/mongo/tla_plus/RaftMongoWithRaftReconfig.tla
new file mode 100644
index 00000000000..aacb2903a1b
--- /dev/null
+++ b/src/mongo/tla_plus/RaftMongoWithRaftReconfig.tla
@@ -0,0 +1,263 @@
+\* Copyright 2019 MongoDB, Inc.
+\*
+\* This work is licensed under:
+\* - Creative Commons Attribution-3.0 United States License
+\* http://creativecommons.org/licenses/by/3.0/us/
+
+--------------------------------- MODULE RaftMongoWithRaftReconfig --------------------------------
+\* This is the formal specification for the Raft consensus algorithm in MongoDB.
+\* It allows reconfig using the protocol for single server membership changes described in Raft.
+\* Note that we did not choose to implement the protocol for single server membership changes
+\* described in Raft. This specification was for exploratory purposes only.
+
+EXTENDS Naturals, FiniteSets, Sequences, TLC
+
+\* The set of server IDs
+CONSTANTS Server
+
+\* Server states.
+\* Candidate is not used, but this is fine.
+CONSTANTS Follower, Candidate, Leader
+
+\* A reserved value.
+CONSTANTS Nil
+
+----
+\* Global variables
+
+\* Servers in a given config version.
+\* e.g. << {S1, S2}, {S1, S2, S3} >>
+VARIABLE configs
+
+\* The set of log entries that have been acknowledged as committed, i.e.
+\* "immediately committed" entries. It does not include "prefix committed"
+\* entries, which are allowed to roll back on minority nodes.
+VARIABLE committedEntries
+
+----
+\* The following variables are all per server (functions with domain Server).
+
+\* The server's term number.
+VARIABLE currentTerm
+
+\* The server's state (Follower, Candidate, or Leader).
+VARIABLE state
+
+serverVars == <<currentTerm, state>>
+
+\* A Sequence of log entries. The index into this sequence is the index of the
+\* log entry. Unfortunately, the Sequence module defines Head(s) as the entry
+\* with index 1, so be careful not to use that!
+VARIABLE log
+logVars == <<log, committedEntries>>
+
+\* End of per server variables.
+----
+
+\* All variables; used for stuttering (asserting state hasn't changed).
+vars == <<serverVars, logVars, configs>>
+
+----
+\* Helpers
+
+\* The term of the last entry in a log, or 0 if the log is empty.
+GetTerm(xlog, index) == IF index = 0 THEN 0 ELSE xlog[index].term
+LogTerm(i, index) == GetTerm(log[i], index)
+LastTerm(xlog) == GetTerm(xlog, Len(xlog))
+
+\* Return the minimum value from a set, or undefined if the set is empty.
+Min(s) == CHOOSE x \in s : \A y \in s : x <= y
+\* Return the maximum value from a set, or undefined if the set is empty.
+Max(s) == CHOOSE x \in s : \A y \in s : x >= y
+
+\* The config version in the node's last entry.
+GetConfigVersion(i) == log[i][Len(log[i])].configVersion
+
+\* Gets the node's first entry with a given config version.
+GetConfigEntry(i, configVersion) == LET configEntries == {index \in 1..Len(log[i]) :
+ log[i][index].configVersion = configVersion}
+ IN Min(configEntries)
+
+\* The servers that are in the same config as i.
+ServerViewOn(i) == configs[GetConfigVersion(i)]
+
+\* The set of all quorums. This just calculates simple majorities, but the only
+\* important property is that every quorum overlaps with every other.
+Quorum(me) == {sub \in SUBSET(ServerViewOn(me)) : Cardinality(sub) * 2 > Cardinality(ServerViewOn(me))}
+
+----
+\* Define initial values for all variables
+InitServerVars == /\ currentTerm = [i \in Server |-> 0]
+ /\ state = [i \in Server |-> Follower]
+InitLogVars == /\ log = [i \in Server |-> << [term |-> 0, configVersion |-> 1] >>]
+ /\ committedEntries = {[term |-> 0, index |-> 1]}
+InitConfigs == configs = << Server >>
+Init == /\ InitServerVars
+ /\ InitLogVars
+ /\ InitConfigs
+
+----
+\* Message handlers
+\* i = recipient, j = sender, m = message
+
+AppendOplog(i, j) ==
+ /\ state[i] = Follower \* Disable primary catchup and draining
+ /\ j \in ServerViewOn(i) \* j is in the config of i.
+ /\ Len(log[i]) < Len(log[j])
+ /\ LastTerm(log[i]) = LogTerm(j, Len(log[i]))
+ /\ log' = [log EXCEPT ![i] = Append(log[i], log[j][Len(log[i]) + 1])]
+ /\ UNCHANGED <<serverVars, committedEntries, configs>>
+
+CanRollbackOplog(i, j) ==
+ /\ j \in ServerViewOn(i) \* j is in the config of i.
+ /\ Len(log[i]) > 0
+ /\ \* The log with later term is more up-to-date
+ LastTerm(log[i]) < LastTerm(log[j])
+ /\
+ \/ Len(log[i]) > Len(log[j])
+ \* There seems no short-cut of OR clauses, so I have to specify the negative case
+ \/ /\ Len(log[i]) <= Len(log[j])
+ /\ LastTerm(log[i]) /= LogTerm(j, Len(log[i]))
+
+RollbackOplog(i, j) ==
+ /\ CanRollbackOplog(i, j)
+ \* Rollback 1 oplog entry
+ /\ LET new == [index2 \in 1..(Len(log[i]) - 1) |-> log[i][index2]]
+ IN log' = [log EXCEPT ![i] = new]
+ /\ UNCHANGED <<serverVars, committedEntries, configs>>
+
+\* The set of nodes in my config that has log[me][logIndex] in their oplog
+Agree(me, logIndex) ==
+ { node \in ServerViewOn(me) :
+ /\ Len(log[node]) >= logIndex
+ /\ LogTerm(me, logIndex) = LogTerm(node, logIndex) }
+
+NotBehind(me, j) == \/ LastTerm(log[me]) > LastTerm(log[j])
+ \/ /\ LastTerm(log[me]) = LastTerm(log[j])
+ /\ Len(log[me]) >= Len(log[j])
+
+\* ACTION
+\* i = the new primary node.
+BecomePrimaryByMagic(i, ayeVoters) ==
+ /\ \A j \in ayeVoters : /\ i \in ServerViewOn(j)
+ /\ NotBehind(i, j)
+ /\ currentTerm[j] <= currentTerm[i]
+ /\ ayeVoters \in Quorum(i)
+ /\ state' = [index \in Server |-> IF index \notin ayeVoters
+ THEN state[index]
+ ELSE IF index = i THEN Leader ELSE Follower]
+ /\ currentTerm' = [index \in Server |-> IF index \in (ayeVoters \union {i})
+ THEN currentTerm[i] + 1
+ ELSE currentTerm[index]]
+ /\ UNCHANGED <<logVars, configs>>
+
+\* ACTION
+\* Leader i receives a client request to add v to the log.
+ClientWrite(i) ==
+ /\ state[i] = Leader
+ /\ LET entry == [term |-> currentTerm[i], configVersion |-> GetConfigVersion(i)]
+ newLog == Append(log[i], entry)
+ IN log' = [log EXCEPT ![i] = newLog]
+ /\ UNCHANGED <<serverVars, committedEntries, configs>>
+
+\* ACTION
+\* Commit the latest log entry on a primary.
+AdvanceCommitPoint ==
+ \E leader \in Server : \E acknowledgers \in SUBSET Server :
+ /\ state[leader] = Leader
+ /\ acknowledgers \subseteq Agree(leader, Len(log[leader]))
+ /\ acknowledgers \in Quorum(leader)
+ \* If we comment out the following line, a replicated log entry from old primary will voilate the safety.
+ \* [ P (2), S (), S ()]
+ \* [ S (2), S (), P (3)]
+ \* [ S (2), S (2), P (3)] !!! the log from term 2 shouldn't be considered as committed.
+ /\ LogTerm(leader, Len(log[leader])) = currentTerm[leader]
+ \* If an acknowledger has a higher term, the leader would step down.
+ /\ \A j \in acknowledgers : currentTerm[j] <= currentTerm[leader]
+ /\ committedEntries' = committedEntries \union {[term |-> LastTerm(log[leader]), index |-> Len(log[leader])]}
+ /\ UNCHANGED <<serverVars, log, configs>>
+
+UpdateTermThroughHeartbeat(i, j) ==
+ /\ j \in ServerViewOn(i) \* j is in the config of i.
+ /\ currentTerm[j] > currentTerm[i]
+ /\ currentTerm' = [currentTerm EXCEPT ![i] = currentTerm[j]]
+ /\ state' = [state EXCEPT ![i] = IF ~(state[i] = Leader) THEN state[i] ELSE Follower]
+ /\ UNCHANGED <<logVars, configs>>
+
+Reconfig(i, newConfig) ==
+ /\ state[i] = Leader
+ /\ i \in newConfig
+ \* Only support single node addition/removal.
+ /\ Cardinality(ServerViewOn(i) \ newConfig) + Cardinality(newConfig \ ServerViewOn(i)) <= 1
+ \* The config entry must be committed.
+ /\ LET configEntry == GetConfigEntry(i, GetConfigVersion(i))
+ IN [term |-> log[i][configEntry].term, index |-> configEntry] \in committedEntries
+ \* The primary must have committed an entry in its current term.
+ /\ \E entry \in committedEntries : entry.term = currentTerm[i]
+ /\ configs' = Append(configs, newConfig)
+ /\ LET entry == [term |-> currentTerm[i], configVersion |-> Len(configs) + 1]
+ newLog == Append(log[i], entry)
+ IN log' = [log EXCEPT ![i] = newLog]
+ /\ UNCHANGED <<serverVars, committedEntries>>
+
+----
+AppendOplogAction ==
+ \E i,j \in Server : AppendOplog(i, j)
+
+RollbackOplogAction ==
+ \E i,j \in Server : RollbackOplog(i, j)
+
+BecomePrimaryByMagicAction ==
+ \E i \in Server : \E ayeVoters \in SUBSET(Server) : BecomePrimaryByMagic(i, ayeVoters)
+
+ClientWriteAction ==
+ \E i \in Server : ClientWrite(i)
+
+UpdateTermThroughHeartbeatAction ==
+ \E i,j \in Server : UpdateTermThroughHeartbeat(i, j)
+
+ReconfigAction ==
+ \E i \in Server : \E newConfig \in SUBSET(Server) : Reconfig(i, newConfig)
+
+----
+\* Defines how the variables may transition.
+Next ==
+ \* --- Replication protocol
+ \/ AppendOplogAction
+ \/ RollbackOplogAction
+ \/ BecomePrimaryByMagicAction
+ \/ ClientWriteAction
+ \/ AdvanceCommitPoint
+ \/ ReconfigAction
+ \/ UpdateTermThroughHeartbeatAction
+
+Liveness ==
+ /\ SF_vars(AppendOplogAction)
+ /\ SF_vars(RollbackOplogAction)
+ \* A new primary should eventually write one entry.
+ /\ WF_vars(\E i \in Server : LastTerm(log[i]) # currentTerm[i] /\ ClientWrite(i))
+ \* /\ WF_vars(ClientWriteAction)
+
+\* The specification must start with the initial state and transition according
+\* to Next.
+Spec == Init /\ [][Next]_vars /\ Liveness
+
+\* RollbackCommitted and NeverRollbackCommitted are not actions.
+\* They are used for verification.
+RollbackCommitted(i) ==
+ /\ [term |-> LastTerm(log[i]), index |-> Len(log[i])] \in committedEntries
+ /\ \E j \in Server: CanRollbackOplog(i, j)
+
+NeverRollbackCommitted ==
+ \A i \in Server: ~RollbackCommitted(i)
+
+TwoPrimariesInSameTerm ==
+ \E i, j \in Server :
+ /\ i # j
+ /\ currentTerm[i] = currentTerm[j]
+ /\ state[i] = Leader
+ /\ state[j] = Leader
+
+NoTwoPrimariesInSameTerm == ~TwoPrimariesInSameTerm
+
+===============================================================================
diff --git a/src/mongo/tla_plus/download-tlc.sh b/src/mongo/tla_plus/download-tlc.sh
new file mode 100755
index 00000000000..b63944a7231
--- /dev/null
+++ b/src/mongo/tla_plus/download-tlc.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+# Downloads TLC, which is the model-checker for the TLA+ formal specifications in this directory.
+
+echo "Downloading tla2tools.jar"
+curl -fLO https://github.com/tlaplus/tlaplus/releases/download/v1.7.0/tla2tools.jar
diff --git a/src/mongo/tla_plus/model-check.sh b/src/mongo/tla_plus/model-check.sh
new file mode 100755
index 00000000000..cc53169ca28
--- /dev/null
+++ b/src/mongo/tla_plus/model-check.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+
+# Execute TLC, the TLA+ model-checker, on a TLA+ specification and model config. Call like:
+#
+# ./model-check.sh RaftMongo
+#
+# Requires Java 11. You can set the JAVA_BINARY environment variable to the full path to java.
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: $0 SPEC_DIRECTORY" >&2
+ exit 1
+fi
+if ! [ -e "$1" ]; then
+ echo "Directory $1 not found" >&2
+ exit 1
+fi
+if ! [ -d "$1" ]; then
+ echo "$1 not a directory" >&2
+ exit 1
+fi
+if ! [ -f "tla2tools.jar" ]; then
+ echo "No tla2tools.jar, run download-tlc.sh first"
+ exit 1
+fi
+
+TLA_FILE="MC$1.tla"
+if ! [ -f "$1/$TLA_FILE" ]; then
+ echo "$1/$TLA_FILE does not exist" >&2
+ exit 1
+fi
+
+if [ -z "$JAVA_BINARY" ]; then
+ JAVA_BINARY=java
+else
+ echo "Using java binary [$JAVA_BINARY]"
+fi
+
+cd "$1"
+# Defer liveness checks to the end with -lncheck, for speed.
+"$JAVA_BINARY" -XX:+UseParallelGC -Dtlc2.tool.fp.FPSet.impl=tlc2.tool.fp.OffHeapDiskFPSet -Dutil.ExecutionStatisticsCollector.id=10f53a1c957c11ea94a033245b683b65 -cp ../tla2tools.jar tlc2.TLC -lncheck final -workers auto "$TLA_FILE"