diff options
author | A. Jesse Jiryu Davis <jesse@mongodb.com> | 2021-08-03 13:36:39 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2021-08-04 13:46:13 +0000 |
commit | 35c2b631b6efa1af9909d1c6edaef10d7dbacc1e (patch) | |
tree | 411cf344cb5a3cfbece0c8e43191405af21bb967 /src/mongo/tla_plus | |
parent | d0eb0b5775e46895ab5e884db2af70632615c70e (diff) | |
download | mongo-35c2b631b6efa1af9909d1c6edaef10d7dbacc1e.tar.gz |
SERVER-59026 Move TLA+ dir from repl/ to mongo/
Diffstat (limited to 'src/mongo/tla_plus')
-rw-r--r-- | src/mongo/tla_plus/.gitignore | 3 | ||||
-rw-r--r-- | src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.cfg | 34 | ||||
-rw-r--r-- | src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.tla | 19 | ||||
-rw-r--r-- | src/mongo/tla_plus/MongoReplReconfig/MongoReplReconfig.tla | 492 | ||||
-rw-r--r-- | src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.cfg | 60 | ||||
-rw-r--r-- | src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.tla | 15 | ||||
-rw-r--r-- | src/mongo/tla_plus/MultiTenantMigrations/MultiTenantMigrations.tla | 497 | ||||
-rw-r--r-- | src/mongo/tla_plus/README.md | 23 | ||||
-rw-r--r-- | src/mongo/tla_plus/RaftMongo/MCRaftMongo.cfg | 30 | ||||
-rw-r--r-- | src/mongo/tla_plus/RaftMongo/MCRaftMongo.tla | 13 | ||||
-rw-r--r-- | src/mongo/tla_plus/RaftMongo/RaftMongo.tla | 331 | ||||
-rw-r--r-- | src/mongo/tla_plus/RaftMongoWithRaftReconfig.tla | 263 | ||||
-rwxr-xr-x | src/mongo/tla_plus/download-tlc.sh | 6 | ||||
-rwxr-xr-x | src/mongo/tla_plus/model-check.sh | 40 |
14 files changed, 1826 insertions, 0 deletions
diff --git a/src/mongo/tla_plus/.gitignore b/src/mongo/tla_plus/.gitignore new file mode 100644 index 00000000000..a9dcc645fd5 --- /dev/null +++ b/src/mongo/tla_plus/.gitignore @@ -0,0 +1,3 @@ +**/*.toolbox +**/states/ +tla2tools.jar diff --git a/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.cfg b/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.cfg new file mode 100644 index 00000000000..beb09332519 --- /dev/null +++ b/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.cfg @@ -0,0 +1,34 @@ +\* Config file to run the TLC model-checker on MongoReplReconfig.tla. +\* See MongoReplReconfig.tla for instructions. + +SPECIFICATION Spec +CONSTANTS +Leader = Leader +Follower = Follower +Down = Down + +\* The set of server IDs. Adjust to test different replica set sizes. +CONSTANT Server = {n1, n2, n3} + +CONSTANTS +\* The longest oplog any server can reach during model-checking. +MaxLogLen = 2 + +\* The number of election terms to simulate during model-checking. +MaxTerm = 3 + +\* The number of reconfigs allowed during model-checking. +MaxConfigVersion = 3 + +\* The number of commit points advanced during model-checking. +MaxCommittedEntries = 3 + +\* Constrain the model to be finite and leverage the symmetry to reduce +\* the state space. +\* Symmetry checking may invalidate liveness checking in certain cases. +SYMMETRY ServerSymmetry +CONSTRAINT StateConstraint + +\* Invariants and properties checked by TLC. +INVARIANT ElectionSafety +PROPERTY NeverRollbackCommitted diff --git a/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.tla b/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.tla new file mode 100644 index 00000000000..5ebe4859678 --- /dev/null +++ b/src/mongo/tla_plus/MongoReplReconfig/MCMongoReplReconfig.tla @@ -0,0 +1,19 @@ +---- MODULE MCMongoReplReconfig ---- +\* This module defines MCMongoReplReconfig.tla constants/constraints for model-checking. + +EXTENDS MongoReplReconfig + +(**************************************************************************************************) +(* State Constraint. Used for model checking only. *) +(**************************************************************************************************) + +CONSTANTS MaxTerm, MaxLogLen, MaxConfigVersion, MaxCommittedEntries + +StateConstraint == \A s \in Server : + /\ currentTerm[s] <= MaxTerm + /\ Len(log[s]) <= MaxLogLen + /\ configVersion[s] <= MaxConfigVersion + /\ Cardinality(immediatelyCommitted) <= MaxCommittedEntries + +ServerSymmetry == Permutations(Server) +============================================================================= diff --git a/src/mongo/tla_plus/MongoReplReconfig/MongoReplReconfig.tla b/src/mongo/tla_plus/MongoReplReconfig/MongoReplReconfig.tla new file mode 100644 index 00000000000..fe3061827be --- /dev/null +++ b/src/mongo/tla_plus/MongoReplReconfig/MongoReplReconfig.tla @@ -0,0 +1,492 @@ +\* Copyright 2019 MongoDB, Inc. +\* +\* This work is licensed under: +\* - Creative Commons Attribution-3.0 United States License +\* http://creativecommons.org/licenses/by/3.0/us/ + +----------------------------- MODULE MongoReplReconfig ----------------------------- +\* +\* A specification of reconfiguration in the MongoDB replication protocol. +\* This spec only allows single node changes. +\* + + +EXTENDS Integers, FiniteSets, Sequences, TLC + +\* The set of server IDs. +CONSTANTS Server + +\* Server states. +CONSTANTS Leader, Follower, Down + +(**************************************************************************************************) +(* Global variables *) +(**************************************************************************************************) + +\* Set of all immediately committed entries. +\* Each element of the set is a record e.g. [index |-> ..., term |-> ..., configVersion |-> ...] +\* This set does not include "prefix committed" entries. +VARIABLE immediatelyCommitted + +(**************************************************************************************************) +(* Per server variables. *) +(* *) +(* These are all functions with domain Server. *) +(**************************************************************************************************) + +\* The server's term number. +VARIABLE currentTerm + +\* The server's state (Leader, Follower or Down). +VARIABLE state + +serverVars == <<currentTerm, state>> + +\* A sequence of log entries. The index into this sequence is the index of the log entry. +VARIABLE log + +\* +\* Reconfig related variables. +\* + +\* A server's current config. A config is just a set of servers, i.e. an element of SUBSET Server. +VARIABLE config + +\* The config version of a node's current config. +VARIABLE configVersion + +\* The term in which the current config on a node was written in i.e. the term of the primary +\* that moved to that config. +VARIABLE configTerm + +configVars == <<config, configVersion, configTerm>> + +vars == <<serverVars, log, immediatelyCommitted, config, configVersion, configTerm>> + +------------------------------------------------------------------------------------------- + +(**************************************************************************************************) +(* Generic helper operators *) +(**************************************************************************************************) + +\* The set of all quorums of a given set. +Quorums(S) == {i \in SUBSET(S) : Cardinality(i) * 2 > Cardinality(S)} + +\* Return the minimum value from a set, or undefined if the set is empty. +Min(s) == CHOOSE x \in s : \A y \in s : x <= y + +\* Return the maximum value from a set, or undefined if the set is empty. +Max(s) == CHOOSE x \in s : \A y \in s : x >= y + +\* Return the range of a given function. +Range(f) == {f[x] : x \in DOMAIN f} + +\* Is a sequence empty. +Empty(s) == Len(s) = 0 + +\* Alive nodes in a set. +AliveNodes(s) == { n \in s : state[n] # Down } + +------------------------------------------------------------------------------------------- + +(******************************************************************************) +(* Next state actions. *) +(* *) +(* This section defines the core steps of the algorithm, along with some *) +(* related helper definitions/operators. We annotate the main actions with *) +(* an [ACTION] specifier to distinguish them from auxiliary, helper operators.*) +(******************************************************************************) + +\* The term of the last entry in a log, or 0 if the log is empty. +LastTerm(xlog) == IF Len(xlog) = 0 THEN 0 ELSE xlog[Len(xlog)].term +GetTerm(xlog, index) == IF index = 0 THEN 0 ELSE xlog[index].term +LogTerm(i, index) == GetTerm(log[i], index) + +\* Is it possible for log 'i' to roll back against log 'j'. +\* If this is true, it implies that log 'i' should remove entries from the end of its log. +CanRollback(i, j) == + /\ Len(log[i]) > 0 + /\ \* The log with later term is more up-to-date. + LastTerm(log[i]) < LastTerm(log[j]) + /\ \/ Len(log[i]) > Len(log[j]) + \/ /\ Len(log[i]) <= Len(log[j]) + /\ LastTerm(log[i]) /= LogTerm(j, Len(log[i])) + +\* Is the config of node i considered 'newer' than the config of node j. This is the condition for +\* node j to accept the config of node i. +HasSameConfig(i, j) == + /\ configTerm[i] = configTerm[j] + /\ configVersion[i] = configVersion[j] + +\* Exchange terms between two nodes and step down the primary if needed. +UpdateTerms(i, j) == + \* Update terms of sender and receiver i.e. to simulate an RPC request and response (heartbeat). + /\ currentTerm' = [currentTerm EXCEPT ![i] = Max({currentTerm[i], currentTerm[j]}), + ![j] = Max({currentTerm[i], currentTerm[j]})] + \* May update state of sender or receiver. + /\ state' = [state EXCEPT ![j] = IF currentTerm[j] < currentTerm[i] THEN Follower ELSE state[j], + ![i] = IF currentTerm[i] < currentTerm[j] THEN Follower ELSE state[i] ] + +UpdateTermsOnNodes(i, j) == /\ UpdateTerms(i, j) + /\ UNCHANGED <<log, immediatelyCommitted, configVars>> +(******************************************************************************) +(* [ACTION] *) +(* *) +(* Node 'i' rolls back against the log of node 'j'. *) +(******************************************************************************) +RollbackEntries(i, j) == + /\ CanRollback(i, j) + /\ j \in config[i] + \* Roll back one log entry. + /\ log' = [log EXCEPT ![i] = SubSeq(log[i], 1, Len(log[i])-1)] + /\ UNCHANGED <<immediatelyCommitted, configVars, serverVars>> + +(******************************************************************************) +(* [ACTION] *) +(* *) +(* Node 'i' gets a new log entry from node 'j'. *) +(******************************************************************************) +GetEntry(i, j) == + /\ j \in config[i] + /\ state[i] = Follower + \* Node j must have more entries than node i. + /\ Len(log[j]) > Len(log[i]) + \* Ensure that the entry at the last index of node i's log must match the entry at + \* the same index in node j's log. If the log of node i is empty, then the check + \* trivially passes. This is the essential 'log consistency check'. + /\ LET logOk == IF Empty(log[i]) THEN TRUE + ELSE LogTerm(j, Len(log[i])) = LastTerm(log[i]) IN + /\ logOk \* log consistency check + /\ LET newEntryIndex == Len(log[i]) + 1 + newEntry == log[j][newEntryIndex] + newLog == Append(log[i], newEntry) IN + /\ log' = [log EXCEPT ![i] = newLog] + /\ UpdateTerms(i, j) + /\ UNCHANGED <<immediatelyCommitted, configVars>> + + +\* Check whether the entry at "index" on "primary" is committed in the primary's current config. +IsCommitted(index, primary) == + \* The entry was written by this leader. + /\ LogTerm(primary, index) = currentTerm[primary] + /\ \E quorum \in Quorums(config[primary]): + \* all nodes have this log entry and are in the term of the leader. + \A s \in quorum: + /\ Len(log[s]) >= index + /\ log[s][index] = log[primary][index] \* they have the entry. + /\ currentTerm[s] = currentTerm[primary] \* they are in the same term. + +(******************************************************************************) +(* [ACTION] *) +(* *) +(* A leader i commits its newest log entry. It commits it according to *) +(* its own config's notion of a quorum. *) +(******************************************************************************) +CommitEntry(i) == + \* Must have some entries to commit. + /\ ~Empty(log[i]) + \* This node is leader. + /\ state[i] = Leader + /\ IsCommitted(Len(log[i]), i) + /\ immediatelyCommitted' = immediatelyCommitted \cup + {[index |-> Len(log[i]), + term |-> currentTerm[i], + configVersion |-> configVersion[i]]} + /\ UNCHANGED <<serverVars, log, configVars>> + +\* Can node 'voter' currently cast a vote for node 'candidate' in term 'term'. +CanVoteFor(voter, candidate, term) == + LET logOk == + \/ LastTerm(log[candidate]) > LastTerm(log[voter]) + \/ /\ LastTerm(log[candidate]) = LastTerm(log[voter]) + /\ Len(log[candidate]) >= Len(log[voter]) IN + \* Nodes can only vote once per term, and they will never + \* vote for someone with a lesser term than their own. + /\ currentTerm[voter] < term + \* Only vote for someone if their config is the same as the vote's. + /\ HasSameConfig(candidate, voter) + /\ logOk + +(******************************************************************************) +(* [ACTION] *) +(* *) +(* Node 'i' automatically becomes a leader, if eligible. *) +(******************************************************************************) +BecomeLeader(i) == + \* Primaries make decisions based on their current configuration. + LET newTerm == currentTerm[i] + 1 IN + \E voteQuorum \in Quorums(config[i]) : + /\ i \in config[i] \* only become a leader if you are a part of your config. + /\ i \in voteQuorum \* The new leader should vote for itself. + /\ \A v \in voteQuorum : CanVoteFor(v, i, newTerm) + \* Update the terms of each voter. + /\ currentTerm' = [s \in Server |-> IF s \in voteQuorum THEN newTerm ELSE currentTerm[s]] + /\ state' = [s \in Server |-> + IF s = i THEN Leader + \* All voters should revert to secondary state. + ELSE IF s \in voteQuorum THEN Follower + ELSE state[s]] + \* Update config's term on step-up. + /\ configTerm' = [configTerm EXCEPT ![i] = newTerm] + /\ UNCHANGED <<log, config, configVersion, immediatelyCommitted>> + + +\* A quorum of nodes have received this config. +\* With bumping the config term on step-up, this effectively gathers the votes from +\* nodes, so that nodes in earlier configs cannot win elections. +ConfigQuorumCheck(self, s) == /\ configVersion[self] = configVersion[s] + /\ configTerm[self] = configTerm[s] + +\* Was an op was committed in the current config of primary. +\* +\* Before a primary can consider a config "safe", it must make sure that any entries that were +\* acknowledged as "committed" in a prior config are now "committed" in the current config. +\* If moving from Ci -> Cj, it can do this by checking that the latest entry committed in Ci +\* is now committed in Cj. +OpCommittedInConfig(primary) == + \* The primary has at least committed one entry in its term if there's any entry committed + \* in earlier terms. + /\ \/ immediatelyCommitted = {} + \/ \E e \in immediatelyCommitted: IsCommitted(e.index, primary) + \* All entries committed in the primary's term have been committed in the current config. + /\ \A e \in immediatelyCommitted: + e.term = currentTerm[primary] => IsCommitted(e.index, primary) + +\* Has the node talked to a quorum as primary? +TermQuorumCheck(self, s) == currentTerm[self] >= currentTerm[s] + +\* Is the config on node i currently "safe". +ConfigIsSafe(i) == + /\ \E q \in Quorums(config[i]): + \A s \in q : /\ TermQuorumCheck(i, s) + /\ ConfigQuorumCheck(i, s) + /\ OpCommittedInConfig(i) + +(******************************************************************************) +(* [ACTION] *) +(* *) +(* A reconfig occurs on node i. The node must currently be a leader. *) +(******************************************************************************) +Reconfig(i) == + /\ state[i] = Leader + \* Only allow a new config to be installed if the current config is "safe". + /\ ConfigIsSafe(i) + \* Pick some arbitrary subset of servers to reconfig to. + \* Make sure to include this node in the new config, though. + /\ \E newConfig \in SUBSET Server : + \* Add or remove a single node. + /\ \/ \E n \in newConfig : newConfig \ {n} = config[i] \* add 1. + \/ \E n \in config[i] : config[i] \ {n} = newConfig \* remove 1. + /\ i \in newConfig + \* Require that at least a quorum of nodes in the new config are not down. + /\ AliveNodes(newConfig) \in Quorums(newConfig) + \* The config on this node takes effect immediately + /\ config' = [config EXCEPT ![i] = newConfig] + \* Record the term of the primary that wrote this config. + /\ configTerm' = [configTerm EXCEPT ![i] = currentTerm[i]] + \* Increment the local config version. Here we do not assume that config versions + \* are globally unique. + /\ configVersion' = [configVersion EXCEPT ![i] = @ + 1] + /\ UNCHANGED <<serverVars, log, immediatelyCommitted>> + +\* [ACTION] +\* Node i sends its current config to node j. It is only accepted if the config is newer. +SendConfig(sender, receiver) == + \* Only update config if the received config is newer and its term is >= than your current term. + /\ \/ configTerm[sender] > configTerm[receiver] + \/ /\ configTerm[sender] = configTerm[receiver] + /\ configVersion[sender] > configVersion[receiver] + \* A node can learn the config even if it has voted for a higher term. + \* This diverges from Raft but is safe because bumping config term on stepup + \* and ConfigQuorumCheck serve the election of the config consensus together with + \* the data consensus election. They guarantee that old configs cannot elect a primary + \* once ConfigIsSafe=True, no matter whether the old configs propagate to voters of + \* higher terms. + \* /\ configTerm[sender] >= currentTerm[receiver] + /\ config' = [config EXCEPT ![receiver] = config[sender]] + /\ configVersion' = [configVersion EXCEPT ![receiver] = configVersion[sender]] + /\ configTerm' = [configTerm EXCEPT ![receiver] = configTerm[sender]] + /\ UpdateTerms(sender, receiver) + /\ UNCHANGED <<log, immediatelyCommitted>> + +(******************************************************************************) +(* [ACTION] *) +(* *) +(* Shut down a node. Shutdown is used to check liveness properties. *) +(******************************************************************************) +ShutDown(i) == + /\ state[i] # Down + /\ \A s \in Server: + /\ s \in config[s] \* The node isn't removed. + \* This spec assumes there is never a majority of an active config down, for + \* the sake of checking liveness properties so long as that condition holds. + /\ { n \in config[s]: state[n] # Down } \ {i} \in Quorums(config[s]) + /\ state' = [state EXCEPT ![i] = Down] + /\ UNCHANGED <<currentTerm, immediatelyCommitted, log, configVars>> + +(******************************************************************************) +(* [ACTION] *) +(* *) +(* Node 'i', a primary, handles a new client request and places the entry in *) +(* its log. *) +(******************************************************************************) +ClientRequest(i) == + /\ state[i] = Leader + /\ log' = [log EXCEPT ![i] = Append(@, [term |-> currentTerm[i]])] + /\ UNCHANGED <<serverVars, immediatelyCommitted, configVars>> + +------------------------------------------------------------------------------------------- + +(**************************************************************************************************) +(* Miscellaneous properties for exploring/understanding the spec. *) +(**************************************************************************************************) + +\* Are there two primaries in the current state. +TwoPrimaries == \E s, t \in Server : s # t /\ state[s] = Leader /\ state[s] = state[t] + +NPrimaries(n) == + \E prims \in SUBSET Server : + /\ \A s \in prims : state[s] = Leader + /\ Cardinality(prims) = n + +\* Are there 'n' concurrent, differing configs active on some set of nodes in +\* the current state. +NConcurrentConfigs(n) == + \E S \in SUBSET Server : + /\ Cardinality(S) = n + /\ \A x, y \in S : x # y => config[x] # config[y] + +\* The set of all currently installed configs in the system. +InstalledConfigs == Range(config) + +\* Do all quorums of set x and set y share at least one overlapping node. +QuorumsOverlap(x, y) == \A qx \in Quorums(x), qy \in Quorums(y) : qx \cap qy # {} + +\* Is a given config "active" i.e. can it form a quorum. +ActiveConfig(cfg) == \E Q \in Quorums(cfg) : \A s \in Q : config[s] = cfg + +\* The set of all active configs. +ActiveConfigs == {c \in InstalledConfigs : ActiveConfig(c)} + +\* For all installed configs, do their quorums overlap. +InstalledConfigsOverlap == \A x,y \in InstalledConfigs : QuorumsOverlap(x, y) + +\* For all active configs, do their quorums overlap. +ActiveConfigsOverlap == \A x,y \in ActiveConfigs : QuorumsOverlap(x, y) + +\* Property asserting that there is never more than 1 active config at a time. +AtMostOneActiveConfig == Cardinality(ActiveConfigs) <= 1 + +(**************************************************************************************************) +(* Correctness Properties *) +(**************************************************************************************************) + +\* The set of all log entries in a given log i.e. the set of all <<index, term>> +\* pairs that appear in the log. +LogEntries(xlog) == {<<i, xlog[i].term>> : i \in DOMAIN xlog} + +\* Is <<index, term>> in the given log. +EntryInLog(xlog, index, term) == <<index, term>> \in LogEntries(xlog) + +\* Is 'xlog' a prefix of 'ylog'. +IsPrefix(xlog, ylog) == + /\ Len(xlog) <= Len(ylog) + /\ xlog = SubSeq(ylog, 1, Len(xlog)) + +TwoPrimariesInSameTerm == + \E i, j \in Server : + /\ i # j + /\ currentTerm[i] = currentTerm[j] + /\ state[i] = Leader + /\ state[j] = Leader + +\* There should be at most one leader per term. +ElectionSafety == ~TwoPrimariesInSameTerm + +ConfigVersionIncreasesWithTerm == + ~(\E i, j \in Server : + /\ i # j + /\ configVersion[i] > configVersion[j] + /\ configTerm[i] < configTerm[j] + ) + +\* Only uncommitted entries are allowed to be deleted from logs. +RollbackCommitted == \E s \in Server : + \E e \in immediatelyCommitted : + /\ EntryInLog(log[s], e.index, e.term) + \* And the entry got rolled back. + /\ Len(log'[s]) < e.index + +NeverRollbackCommitted == [][~RollbackCommitted]_vars + +\* At any time, some node can always become a leader. +ElectableNodeExists == \E s \in Server : ENABLED BecomeLeader(s) + +(**************************************************************************************************) +(* Liveness properties *) +(**************************************************************************************************) +AnyNodeCanRollBack == \E s \in Server : + \E syncSource \in Server: ENABLED RollbackEntries(syncSource, s) + +ConfigEventuallyPropagates == <>( + \/ AnyNodeCanRollBack + \/ \A i, j \in Server: + \/ i \notin config[j] + \/ state[i] = Down + \/ configVersion[i] = configVersion[j] +) + +ElectableNodeEventuallyExists == <>( + \/ AnyNodeCanRollBack + \/ \E s \in Server : state[s] = Leader) + +(**************************************************************************************************) +(* Spec definition *) +(**************************************************************************************************) +Init == + \* Server variables. + /\ currentTerm = [i \in Server |-> 0] + /\ state = [i \in Server |-> Follower] + /\ log = [i \in Server |-> << >>] + \* Reconfig variables. + \* Initially, all nodes start out with the same view of the config. + \* We allow an initial config to be any non-empty subset of the current servers. + /\ \E initConfig \in (SUBSET Server) : + /\ initConfig # {} + /\ config = [i \in Server |-> initConfig] + /\ configVersion = [i \in Server |-> 0] + /\ configTerm = [i \in Server |-> 0] + /\ immediatelyCommitted = {} + +BecomeLeaderAction == \E s \in AliveNodes(Server) : BecomeLeader(s) +ClientRequestAction == \E s \in AliveNodes(Server) : ClientRequest(s) +GetEntryAction == \E s, t \in AliveNodes(Server) : GetEntry(s, t) +RollbackEntriesAction == \E s, t \in AliveNodes(Server) : RollbackEntries(s, t) +ReconfigAction == \E s \in AliveNodes(Server) : Reconfig(s) +SendConfigAction == \E s,t \in AliveNodes(Server) : SendConfig(s, t) +CommitEntryAction == \E s \in AliveNodes(Server) : CommitEntry(s) +ShutDownAction == \E s \in AliveNodes(Server) : ShutDown(s) +UpdateTermsAction == \E s, t \in AliveNodes(Server) : UpdateTermsOnNodes(s, t) + +Next == + \/ BecomeLeaderAction + \/ ClientRequestAction + \/ GetEntryAction + \/ RollbackEntriesAction + \/ ReconfigAction + \/ SendConfigAction + \/ CommitEntryAction + \/ ShutDownAction + \/ UpdateTermsAction + +Liveness == + /\ WF_vars(BecomeLeaderAction) + /\ WF_vars(SendConfigAction) + /\ WF_vars(UpdateTermsAction) + /\ WF_vars(GetEntryAction) + /\ WF_vars(RollbackEntriesAction) + /\ WF_vars(CommitEntryAction) + +Spec == Init /\ [][Next]_vars /\ Liveness + +============================================================================= diff --git a/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.cfg b/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.cfg new file mode 100644 index 00000000000..bdf6a17c98f --- /dev/null +++ b/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.cfg @@ -0,0 +1,60 @@ +\* Config file to run the TLC model-checker on MultiTenantMigrations.tla. +\* See MultiTenantMigrations.tla for instructions. + +CONSTANTS DonorStartMigrationRequest = DonorStartMigrationRequest +CONSTANTS DonorStartMigrationResponse = DonorStartMigrationResponse + +CONSTANTS RecipientSyncData1Request = RecipientSyncData1Request +CONSTANTS RecipientSyncData1Response = RecipientSyncData1Response + +CONSTANTS RecipientSyncData2Request = RecipientSyncData2Request +CONSTANTS RecipientSyncData2Response = RecipientSyncData2Response + +CONSTANTS DonorForgetMigrationRequest = DonorForgetMigrationRequest +CONSTANTS DonorForgetMigrationResponse = DonorForgetMigrationResponse + +CONSTANTS RecipientForgetMigrationRequest = RecipientForgetMigrationRequest +CONSTANTS RecipientForgetMigrationResponse = RecipientForgetMigrationResponse + +CONSTANTS RecUnstarted = RecUnstarted +CONSTANTS RecInconsistent = RecInconsistent +CONSTANTS RecConsistent = RecConsistent +CONSTANTS RecLagged = RecLagged +CONSTANTS RecReady = RecReady +CONSTANTS RecAborted = RecAborted +CONSTANTS RecTerminalState = RecTerminalState + +CONSTANTS DonUnstarted = DonUnstarted +CONSTANTS DonDataSync = DonDataSync +CONSTANTS DonBlocking = DonBlocking +CONSTANTS DonCommitted = DonCommitted +CONSTANTS DonAborted = DonAborted +CONSTANTS DonTerminalState = DonTerminalState + +CONSTANTS CloudUnknown = CloudUnknown +CONSTANTS CloudCommitted = CloudCommitted +CONSTANTS CloudAborted = CloudAborted +CONSTANTS CloudTerminalState = CloudTerminalState + +CONSTANTS MigrationNone = MigrationNone +CONSTANTS MigrationCommitted = MigrationCommitted +CONSTANTS MigrationAborted = MigrationAborted + +CONSTANTS SyncOK = SyncOK +CONSTANTS SyncAborted = SyncAborted + +CONSTANT MaxRequests = 8 + +INVARIANT StateMachinesConsistent + +PROPERTY MigrationEventuallyCompletes +PROPERTY MessageBagEventuallyEmpties +PROPERTY EachRequestHasAResponse + +\* Not configurable. +CONSTRAINT StateConstraint +SPECIFICATION Spec + +\* The spec can terminate without a deadlock. The liveness properties are present to ensure the +\* termination states are correct. +CHECK_DEADLOCK FALSE diff --git a/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.tla b/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.tla new file mode 100644 index 00000000000..08ac48de0b2 --- /dev/null +++ b/src/mongo/tla_plus/MultiTenantMigrations/MCMultiTenantMigrations.tla @@ -0,0 +1,15 @@ +---- MODULE MCMultiTenantMigrations ---- +\* This module defines MCMultiTenantMigrations.tla constants/constraints for model-checking. + +EXTENDS MultiTenantMigrations + +CONSTANT MaxRequests + +(**************************************************************************************************) +(* State Constraint. Used for model checking only. *) +(**************************************************************************************************) + +StateConstraint == + MaxRequests > totalRequests + +============================================================================= diff --git a/src/mongo/tla_plus/MultiTenantMigrations/MultiTenantMigrations.tla b/src/mongo/tla_plus/MultiTenantMigrations/MultiTenantMigrations.tla new file mode 100644 index 00000000000..fda343da0a7 --- /dev/null +++ b/src/mongo/tla_plus/MultiTenantMigrations/MultiTenantMigrations.tla @@ -0,0 +1,497 @@ +\* Copyright 2020 MongoDB, Inc. +\* +\* This work is licensed under: +\* - Creative Commons Attribution-3.0 United States License +\* http://creativecommons.org/licenses/by/3.0/us/ + +----------------------------- MODULE MultiTenantMigrations ----------------------------- +\* +\* A specification of MongoDB's multi-tenant migrations state-machine protocol. +\* +\* To run the model-checker, first edit the constants in MCMultiTenantMigrations.cfg if desired, +\* then: +\* cd src/mongo/tla_plus +\* ./model-check.sh MultiTenantMigrations +\* + +EXTENDS Integers, FiniteSets, Sequences, TLC + +\* Command requests and responses +CONSTANTS DonorStartMigrationRequest, DonorStartMigrationResponse +CONSTANTS RecipientSyncData1Request, RecipientSyncData1Response +CONSTANTS RecipientSyncData2Request, RecipientSyncData2Response +CONSTANTS DonorForgetMigrationRequest, DonorForgetMigrationResponse +CONSTANTS RecipientForgetMigrationRequest, RecipientForgetMigrationResponse + +\* recipient states +\* RecUnstarted -(syncData1)-> RecInconsistent -> RecConsistent -(syncData2)-> RecLagged -> RecReady +CONSTANTS RecUnstarted, RecInconsistent, RecConsistent, RecLagged, RecReady, RecAborted, + RecTerminalState + +\* donor states +\* DonUnstarted -(startMigration)-> DonDataSync -(syncData1Res) +\* -> DonBlocking -(syncData2Res)-> DonCommitted +CONSTANTS DonUnstarted, DonDataSync, DonBlocking, DonCommitted, DonAborted, DonTerminalState + +\* cloud state +CONSTANTS CloudUnknown, CloudCommitted, CloudAborted, CloudTerminalState + +\* Responses to DonorStartMigration request +CONSTANTS MigrationNone, MigrationCommitted, MigrationAborted + +\* Responses to RecipientSyncData1/2 requests +CONSTANTS SyncOK, SyncAborted + +(**************************************************************************************************) +(* Global variables *) +(**************************************************************************************************) + +VARIABLE messages +VARIABLE recipientState +VARIABLE donorState +VARIABLE cloudState +VARIABLE totalRequests +VARIABLE totalResponses +VARIABLE recipientAborted + +donorVars == <<donorState>> +recipientVars == <<recipientState, recipientAborted>> +cloudVars == <<cloudState>> +messageVars == <<messages, totalRequests, totalResponses>> +vars == <<donorVars, recipientVars, cloudVars, messageVars>> + +------------------------------------------------------------------------------------------- + +(**************************************************************************************************) +(* Network Helpers, adapted from https://github.com/ongardie/raft.tla/blob/master/raft.tla *) +(**************************************************************************************************) + +\* Helper for Send. Given a message m and bag of messages, return a new bag of messages with one +\* more m in it. +WithMessage(m, msgs) == + IF m \in DOMAIN msgs THEN + [msgs EXCEPT ![m] = msgs[m] + 1] + ELSE + msgs @@ (m :> 1) + +\* Helper for Discard and Reply. Given a message m and bag of messages, return a new bag of +\* messages with one less m in it. +WithoutMessage(m, msgs) == + IF m \in DOMAIN msgs THEN + IF msgs[m] = 1 THEN + \* Remove message m from the bag. + [n \in DOMAIN msgs \ {m} |-> msgs[n]] + ELSE + [msgs EXCEPT ![m] = msgs[m] - 1] + ELSE + msgs + +IsRequest(m) == + m.mType \in {DonorStartMigrationRequest, RecipientSyncData1Request, + RecipientSyncData2Request, DonorForgetMigrationRequest, + RecipientForgetMigrationRequest} + +IncTotalMessages(m) == + IF IsRequest(m) THEN + /\ totalRequests' = totalRequests + 1 + /\ UNCHANGED <<totalResponses>> + ELSE + /\ totalResponses' = totalResponses + 1 + /\ UNCHANGED <<totalRequests>> + +\* Add a message to the bag of messages. +Send(m) == + /\ messages' = WithMessage(m, messages) + /\ IncTotalMessages(m) + +\* Remove a message from the bag of messages. Used when a server is done processing a message. +Discard(m) == + /\ messages' = WithoutMessage(m, messages) + /\ UNCHANGED <<totalRequests, totalResponses>> + +\* Helper that both sends a message and discards a message. +SendAndDiscard(sendMessage, discardMessage) == + /\ messages' = WithoutMessage(discardMessage, WithMessage(sendMessage, messages)) + /\ IncTotalMessages(sendMessage) + +(**************************************************************************************************) +(* Request and response handlers *) +(**************************************************************************************************) + +\* Helper to create the donorStartMigration response based on the donor state. +DonorStartMigrationResponseGen == + CASE donorState = DonAborted -> + [mType |-> DonorStartMigrationResponse, + mOutcome |-> MigrationAborted] + [] donorState = DonCommitted -> + [mType |-> DonorStartMigrationResponse, + mOutcome |-> MigrationCommitted] + [] donorState \in {DonUnstarted, DonDataSync, DonBlocking, DonTerminalState} -> + [mType |-> DonorStartMigrationResponse, + mOutcome |-> MigrationNone] + +\* Donor +HandleDonorStartMigrationRequest(m) == + /\ m.mType = DonorStartMigrationRequest + \* If the donor is unstarted, it starts, otherwise nothing happens. Either way sends a response + \* to cloud. + /\ CASE donorState = DonUnstarted -> + /\ donorState' = DonDataSync + /\ messages' = WithoutMessage(m, + \* Send an immediate response to cloud. + WithMessage(DonorStartMigrationResponseGen, + \* Send a recipientSyncData1 request to the recipient. + WithMessage([mType |-> RecipientSyncData1Request], messages))) + /\ totalRequests' = totalRequests + 1 + /\ totalResponses' = totalResponses + 1 + [] donorState \in {DonDataSync, DonBlocking, DonCommitted, DonAborted, DonTerminalState} -> + /\ SendAndDiscard(DonorStartMigrationResponseGen, m) + /\ UNCHANGED <<donorVars>> + /\ UNCHANGED <<recipientVars, cloudVars>> + +\* Cloud +HandleDonorStartMigrationResponse(m) == + /\ m.mType = DonorStartMigrationResponse + \* Updates the cloud state to whatever the donor specifies, if specified. + /\ CASE m.mOutcome = MigrationNone -> + UNCHANGED <<cloudState>> + [] m.mOutcome = MigrationCommitted -> + cloudState' = CloudCommitted + [] m.mOutcome = MigrationAborted -> + cloudState' = CloudAborted + /\ Discard(m) + /\ UNCHANGED <<donorVars, recipientVars>> + +\* Recipient +HandleRecipientSyncData1Request(m) == + /\ m.mType = RecipientSyncData1Request + \* We don't want to handle this request being processed while inconsistent, since that would + \* require modeling request joining behavior, which is unnecessary complexity for the + \* purposes of this model. A recipientSyncData1 request being processed while in RecInconsistent + \* must be a duplicate message. + /\ recipientState \notin {RecInconsistent} + /\ CASE recipientState = RecUnstarted -> + \* Starts the migration. The recipient does not respond to the donor until it is + \* consistent. + /\ recipientState' = RecInconsistent + /\ Discard(m) + /\ UNCHANGED <<recipientAborted>> + [] recipientState = RecAborted -> + \* Sends a response to the donor that the migration aborted. + /\ SendAndDiscard([mType |-> RecipientSyncData1Response, + mSyncStatus |-> SyncAborted], m) + /\ UNCHANGED <<recipientVars>> + [] recipientState \in {RecConsistent, RecLagged, RecReady} -> + \* This is a duplicate message, resend the response we must have sent previously. + /\ SendAndDiscard([mType |-> RecipientSyncData1Response, + mSyncStatus |-> SyncOK], m) + /\ UNCHANGED <<recipientVars>> + [] recipientState = RecTerminalState -> + \* This migration has finished, which means the donor and cloud already have committed + \* or aborted. Send SyncOK, which will be ignored. + /\ SendAndDiscard([mType |-> RecipientSyncData1Response, + mSyncStatus |-> SyncOK], m) + /\ UNCHANGED <<recipientVars>> + /\ UNCHANGED <<donorVars, cloudVars>> + +\* Factored out of below to make nested Case statements clearer. +HandleRecipientSyncData1Response_SyncOK(m) == + CASE donorState = DonDataSync -> + \* Move the state machine to "blocking" and send recipientSyncData2. + /\ donorState' = DonBlocking + /\ SendAndDiscard([mType |-> RecipientSyncData2Request], m) + [] donorState \in {DonBlocking, DonCommitted, DonAborted, DonTerminalState} -> + \* Just ignore this message, since we're past this step in the protocol + \* and this is a delayed message. + /\ Discard(m) + /\ UNCHANGED <<donorState>> + +\* Factored out of below to make nested Case statements clearer. +HandleRecipientSyncData1Response_SyncAborted(m) == + /\ CASE donorState = DonDataSync -> + \* The recipient failed the migration, so abort. + \* We can only get this response in DonDataSync or DonBlocking. + \* DataSync is the common case, but Blocking can happen when there are two + \* recipientSyncData1 responses and the "OK" one is processed first. + donorState' = DonAborted + [] donorState \in {DonBlocking, DonAborted, DonTerminalState} -> + \* If the migration is in DonBlocking, we ignore the response. The migration will be + \* aborted on the donor when it receives the recipientSyncData2 response. + \* If the migration is already aborted or finished, do nothing. + UNCHANGED <<donorState>> + /\ Discard(m) + +\* Donor +HandleRecipientSyncData1Response(m) == + /\ m.mType = RecipientSyncData1Response + /\ CASE m.mSyncStatus = SyncOK -> + HandleRecipientSyncData1Response_SyncOK(m) + [] m.mSyncStatus = SyncAborted -> + HandleRecipientSyncData1Response_SyncAborted(m) + /\ UNCHANGED <<recipientVars, cloudVars>> + +\* Recipient +HandleRecipientSyncData2Request(m) == + /\ m.mType = RecipientSyncData2Request + \* We don't want to handle this request being processed while lagged, since that would + \* require modeling request joining behavior, which is unnecessary complexity for the + \* purposes of this model. A recipientSyncData2 request being processed while in RecLagged + \* must be a duplicate message. + /\ recipientState \notin {RecLagged} + /\ CASE recipientState = RecConsistent -> + \* Move the state machine to "lagged", since the recipient now knows the ending + \* timestamp. The recipient does not respond to the donor until it has caught up. + /\ recipientState' = RecLagged + /\ Discard(m) + /\ UNCHANGED <<recipientAborted>> + [] recipientState = RecAborted -> + \* Sends a response to the donor that the migration aborted. + /\ SendAndDiscard([mType |-> RecipientSyncData2Response, + mSyncStatus |-> SyncAborted], m) + /\ UNCHANGED <<recipientVars>> + [] recipientState = RecReady -> + \* This is a duplicate message, resend the response we must have sent previously. + /\ SendAndDiscard([mType |-> RecipientSyncData2Response, + mSyncStatus |-> SyncOK], m) + /\ UNCHANGED <<recipientVars>> + [] recipientState = RecTerminalState -> + \* This migration is finished, which means the donor and cloud already have committed + \* or aborted. Send SyncOK, which will be ignored. + /\ SendAndDiscard([mType |-> RecipientSyncData2Response, + mSyncStatus |-> SyncOK], m) + /\ UNCHANGED <<recipientVars>> + /\ UNCHANGED <<donorVars, cloudVars>> + +\* Factored out of below to make nested Case statements clearer. +HandleRecipientSyncData2Response_SyncOK == + CASE donorState = DonBlocking -> + \* The recipient is done! + donorState' = DonCommitted + [] donorState \in {DonCommitted, DonAborted, DonTerminalState} -> + \* Just ignore this message, since we're past this step in the protocol + \* and this is a delayed message. + UNCHANGED <<donorState>> + +\* Factored out of below to make nested Case statements clearer. +HandleRecipientSyncData2Response_SyncAborted == + CASE donorState = DonBlocking -> + \* The recipient failed the migration, so abort. + donorState' = DonAborted + [] donorState \in {DonAborted, DonTerminalState} -> + \* If the migration is already aborted or finished, do nothing. + UNCHANGED <<donorState>> +\* Donor +HandleRecipientSyncData2Response(m) == + /\ m.mType = RecipientSyncData2Response + /\ CASE m.mSyncStatus = SyncOK -> + HandleRecipientSyncData2Response_SyncOK + [] m.mSyncStatus = SyncAborted -> + HandleRecipientSyncData2Response_SyncAborted + /\ Discard(m) + /\ UNCHANGED <<recipientVars, cloudVars>> + +\* Donor +HandleDonorForgetMigrationRequest(m) == + /\ m.mType = DonorForgetMigrationRequest + \* Don't mark donor finished until recipient is. + /\ SendAndDiscard([mType |-> RecipientForgetMigrationRequest], m) + /\ UNCHANGED <<donorVars, recipientVars, cloudVars>> + +\* Cloud +HandleDonorForgetMigrationResponse(m) == + /\ m.mType = DonorForgetMigrationResponse + \* The donor and recipient unconditionally finish the migration, so cloud can too. + /\ cloudState' = CloudTerminalState + /\ Discard(m) + /\ UNCHANGED <<donorVars, recipientVars>> + +\* Recipient +HandleRecipientForgetMigrationRequest(m) == + /\ m.mType = RecipientForgetMigrationRequest + \* Finish the migration no matter what, and tell the donor. + /\ recipientState' = RecTerminalState + /\ SendAndDiscard([mType |-> RecipientForgetMigrationResponse], m) + /\ UNCHANGED <<donorVars, cloudVars, recipientAborted>> + +\* Donor +HandleRecipientForgetMigrationResponse(m) == + /\ m.mType = RecipientForgetMigrationResponse + \* The recipient has finished the migration, so now the donor can finish the migration and + \* respond to cloud that it has finished the migration. + /\ donorState' = DonTerminalState + /\ SendAndDiscard([mType |-> DonorForgetMigrationResponse], m) + /\ UNCHANGED <<recipientVars, cloudVars>> + + +(******************************************************************************) +(* [ACTION] *) +(******************************************************************************) + +\* Models a retry of recipientSyncData1. +DonorSendsRecipientSyncData1Request == + /\ donorState = DonDataSync + /\ Send([mType |-> RecipientSyncData1Request]) + /\ UNCHANGED <<donorVars, recipientVars, cloudVars>> + +\* Models a retry of recipientSyncData2. +DonorSendsRecipientSyncData2Request == + /\ donorState = DonBlocking + /\ Send([mType |-> RecipientSyncData2Request]) + /\ UNCHANGED <<donorVars, recipientVars, cloudVars>> + +CloudSendsDonorStartMigrationRequest == + /\ cloudState = CloudUnknown + /\ Send([mType |-> DonorStartMigrationRequest]) + /\ UNCHANGED <<donorVars, recipientVars, cloudVars>> + +CloudSendsDonorForgetMigrationRequest == + /\ cloudState \in {CloudAborted, CloudCommitted} + /\ Send([mType |-> DonorForgetMigrationRequest]) + /\ UNCHANGED <<donorVars, recipientVars, cloudVars>> + +RecipientBecomesConsistent == + /\ recipientState = RecInconsistent + /\ recipientState' = RecConsistent + /\ Send([mType |-> RecipientSyncData1Response, + mSyncStatus |-> SyncOK]) + /\ UNCHANGED <<donorVars, cloudVars, recipientAborted>> + +RecipientCatchesUp == + /\ recipientState = RecLagged + /\ recipientState' = RecReady + /\ Send([mType |-> RecipientSyncData2Response, + mSyncStatus |-> SyncOK]) + /\ UNCHANGED <<donorVars, cloudVars, recipientAborted>> + +RecipientFailsMigration == + \* Recipient can't fail after it's ready, finished, or already aborted. + /\ recipientState \notin {RecReady, RecAborted, RecTerminalState} + /\ recipientState' = RecAborted + /\ recipientAborted' = TRUE + /\ CASE recipientState = RecInconsistent -> + \* When "inconsistent" the recipient has an active recipientSyncData1 request. + Send([mType |-> RecipientSyncData1Response, + mSyncStatus |-> SyncAborted]) + [] recipientState = RecLagged -> + \* When "lagged" the recipient has an active recipientSyncData2 request. + Send([mType |-> RecipientSyncData2Response, + mSyncStatus |-> SyncAborted]) + [] recipientState \in {RecUnstarted, RecConsistent} -> + \* Nothing happens, besides setting the state to aborted. On transitioning to + \* "consistent" the recipient already sent a response to the donor and should not + \* send a conflicting response. + UNCHANGED <<messageVars>> + /\ UNCHANGED <<cloudVars, donorVars>> + +(**************************************************************************************************) +(* Correctness Properties *) +(**************************************************************************************************) + +StateMachinesInconsistent == + \/ /\ cloudState = CloudCommitted + /\ \/ recipientState \notin {RecReady, RecTerminalState} + \/ recipientAborted = TRUE + \/ donorState \notin {DonCommitted, DonTerminalState} + \/ /\ donorState = DonCommitted + /\ \/ recipientState \notin {RecReady, RecTerminalState} + \/ recipientAborted = TRUE + +StateMachinesConsistent == ~StateMachinesInconsistent + +(**************************************************************************************************) +(* Liveness properties *) +(**************************************************************************************************) + +\* Checks that the state machines eventually converge on terminating states. +MigrationEventuallyCompletes == + <> /\ recipientState = RecTerminalState + /\ donorState = DonTerminalState + /\ cloudState = CloudTerminalState + +\* Checks that if the bag fills up, it eventually empties. +MessageBagEventuallyEmpties == + Cardinality(DOMAIN messages) > 0 ~> Cardinality(DOMAIN messages) = 0 + +\* Checks that the number of totalRequests eventually equals the number of totalResponses, +\* and stays that way. This will always be right before termination. +EachRequestHasAResponse == + <>[] (totalRequests = totalResponses) + +(**************************************************************************************************) +(* Spec definition *) +(**************************************************************************************************) +Init == + /\ messages = [m \in {} |-> 0] + /\ donorState = DonUnstarted + /\ recipientState = RecUnstarted + /\ cloudState = CloudUnknown + /\ totalRequests = 0 + /\ totalResponses = 0 + /\ recipientAborted = FALSE + +RecipientBecomesConsistentAction == RecipientBecomesConsistent +RecipientCatchesUpAction == RecipientCatchesUp +RecipientFailsMigrationAction == RecipientFailsMigration +CloudSendsDonorStartMigrationRequestAction == CloudSendsDonorStartMigrationRequest +CloudSendsDonorForgetMigrationRequestAction == CloudSendsDonorForgetMigrationRequest +DonorSendsRecipientSyncData1RequestAction == DonorSendsRecipientSyncData1Request +DonorSendsRecipientSyncData2RequestAction == DonorSendsRecipientSyncData2Request + +ReceiveDonorStartMigrationRequestAction == \E m \in DOMAIN messages : + HandleDonorStartMigrationRequest(m) +ReceiveDonorStartMigrationResponseAction == \E m \in DOMAIN messages : + HandleDonorStartMigrationResponse(m) +ReceiveRecipientSyncData1RequestAction == \E m \in DOMAIN messages : + HandleRecipientSyncData1Request(m) +ReceiveRecipientSyncData1ResponseAction == \E m \in DOMAIN messages : + HandleRecipientSyncData1Response(m) +ReceiveRecipientSyncData2RequestAction == \E m \in DOMAIN messages : + HandleRecipientSyncData2Request(m) +ReceiveRecipientSyncData2ResponseAction == \E m \in DOMAIN messages : + HandleRecipientSyncData2Response(m) +ReceiveDonorForgetMigrationRequestAction == \E m \in DOMAIN messages : + HandleDonorForgetMigrationRequest(m) +ReceiveDonorForgetMigrationResponseAction == \E m \in DOMAIN messages : + HandleDonorForgetMigrationResponse(m) +ReceiveRecipientForgetMigrationRequestAction == \E m \in DOMAIN messages : + HandleRecipientForgetMigrationRequest(m) +ReceiveRecipientForgetMigrationResponseAction == \E m \in DOMAIN messages : + HandleRecipientForgetMigrationResponse(m) + +Next == + \/ RecipientBecomesConsistentAction + \/ RecipientCatchesUpAction + \/ RecipientFailsMigrationAction + \/ CloudSendsDonorStartMigrationRequestAction + \/ CloudSendsDonorForgetMigrationRequestAction + \/ DonorSendsRecipientSyncData1RequestAction + \/ DonorSendsRecipientSyncData2RequestAction + \/ ReceiveDonorStartMigrationRequestAction + \/ ReceiveDonorStartMigrationResponseAction + \/ ReceiveRecipientSyncData1RequestAction + \/ ReceiveRecipientSyncData1ResponseAction + \/ ReceiveRecipientSyncData2RequestAction + \/ ReceiveRecipientSyncData2ResponseAction + \/ ReceiveDonorForgetMigrationRequestAction + \/ ReceiveDonorForgetMigrationResponseAction + \/ ReceiveRecipientForgetMigrationRequestAction + \/ ReceiveRecipientForgetMigrationResponseAction + +\* Add fairness constraints so the above liveness properties are met. +Liveness == + /\ WF_vars(ReceiveDonorStartMigrationRequestAction) + /\ WF_vars(ReceiveDonorStartMigrationResponseAction) + /\ WF_vars(ReceiveRecipientSyncData1RequestAction) + /\ WF_vars(ReceiveRecipientSyncData1ResponseAction) + /\ WF_vars(ReceiveRecipientSyncData2RequestAction) + /\ WF_vars(ReceiveRecipientSyncData2ResponseAction) + /\ WF_vars(ReceiveDonorForgetMigrationRequestAction) + /\ WF_vars(ReceiveDonorForgetMigrationResponseAction) + /\ WF_vars(ReceiveRecipientForgetMigrationRequestAction) + /\ WF_vars(ReceiveRecipientForgetMigrationResponseAction) + /\ WF_vars(CloudSendsDonorStartMigrationRequestAction) + /\ WF_vars(CloudSendsDonorForgetMigrationRequestAction) + +Spec == Init /\ [][Next]_vars /\ Liveness + +============================================================================= diff --git a/src/mongo/tla_plus/README.md b/src/mongo/tla_plus/README.md new file mode 100644 index 00000000000..04b22f406d3 --- /dev/null +++ b/src/mongo/tla_plus/README.md @@ -0,0 +1,23 @@ +Replication TLA+ Specifications +=============================== + +These are formal specifications for exploring possible replication protocols. +Some are experiments, some reflect MongoDB's actual implementation. See the +comments in each spec for details. + +Some specs are intended for model-checking. They are in subdirectories like: + +``` +SpecName/ + SpecName.tla specification + MCSpecName.tla additional operators for model-checking + MCSpecName.cfg configuration for model-checking +``` + +Run the model-checker on a spec thus: + +``` +./model-check.sh SpecName +``` + +There may be additional instructions in the spec or config file. diff --git a/src/mongo/tla_plus/RaftMongo/MCRaftMongo.cfg b/src/mongo/tla_plus/RaftMongo/MCRaftMongo.cfg new file mode 100644 index 00000000000..3724efb7f0b --- /dev/null +++ b/src/mongo/tla_plus/RaftMongo/MCRaftMongo.cfg @@ -0,0 +1,30 @@ +\* Config file to run the TLC model-checker on RaftMongo.tla. +\* See RaftMongo.tla for instructions. + +\* The maximum number of oplog entries that can be created on the primary in one +\* action. For model-checking, this can be 1 or a small number. +CONSTANT MaxClientWriteSize = 2 + +\* The number of election terms to simulate during model-checking. +CONSTANT MaxTerm = 3 + +\* The longest oplog any server can reach during model-checking. +CONSTANT MaxLogLen = 3 + +\* The set of server IDs. Adjust to test different replica set sizes. +CONSTANT Server = {1, 2, 3} + +INVARIANT NoTwoPrimariesInSameTerm + +\* NeverRollbackCommitted and NeverRollbackBeforeCommitPoint can be violated, +\* although it's not ultimately a safety issue: SERVER-39626. The issue +\* requires at least 5 servers, 3 terms, and oplogs of length 4+, which are +\* larger limits than we can easily model-check. +INVARIANT NeverRollbackCommitted +INVARIANT NeverRollbackBeforeCommitPoint + +PROPERTY CommitPointEventuallyPropagates + +\* Not configurable. +CONSTRAINT StateConstraint +SPECIFICATION Spec diff --git a/src/mongo/tla_plus/RaftMongo/MCRaftMongo.tla b/src/mongo/tla_plus/RaftMongo/MCRaftMongo.tla new file mode 100644 index 00000000000..057e3dd7a91 --- /dev/null +++ b/src/mongo/tla_plus/RaftMongo/MCRaftMongo.tla @@ -0,0 +1,13 @@ +---- MODULE MCRaftMongo ---- +\* This module defines RaftMongo.tla constants/constraints for model-checking. +\* See RaftMongo.tla for instructions. + +EXTENDS RaftMongo + +CONSTANT MaxTerm +CONSTANT MaxLogLen + +StateConstraint == + /\ GlobalCurrentTerm <= MaxTerm + /\ \forall i \in Server: Len(log[i]) <= MaxLogLen +============================================================================= diff --git a/src/mongo/tla_plus/RaftMongo/RaftMongo.tla b/src/mongo/tla_plus/RaftMongo/RaftMongo.tla new file mode 100644 index 00000000000..bb1ce3f6684 --- /dev/null +++ b/src/mongo/tla_plus/RaftMongo/RaftMongo.tla @@ -0,0 +1,331 @@ +\* Copyright 2019 MongoDB, Inc. +\* +\* This work is licensed under: +\* - Creative Commons Attribution-3.0 United States License +\* http://creativecommons.org/licenses/by/3.0/us/ + +--------------------------------- MODULE RaftMongo --------------------------------- +\* This is the formal specification for the Raft consensus algorithm in MongoDB. +\* +\* To run the model-checker, first edit the constants in MCRaftMongo.cfg if desired, then: +\* cd src/mongo/tla_plus +\* ./model-check.sh RaftMongo + +EXTENDS Integers, FiniteSets, Sequences + +\* The set of server IDs. +CONSTANT Server + +\* The maximum number of oplog entries that can be created on the primary in one +\* action. For model-checking, this can be 1 or a small number. +CONSTANT MaxClientWriteSize + +\* The set of log entries that have been acknowledged as committed, both "immediately committed" and +\* "prefix committed". +VARIABLE committedEntries + +---- +\* The following variables are all per server (functions with domain Server). + +\* The server's term number. +VARIABLE currentTerm + +\* The server's state ("Follower" or "Leader"). +VARIABLE state + +\* The commit point learned by each server. +VARIABLE commitPoint + +electionVars == <<currentTerm, state>> +serverVars == <<electionVars, commitPoint>> + +\* A Sequence of log entries. The index into this sequence is the index of the +\* log entry. Unfortunately, the Sequence module defines Head(s) as the entry +\* with index 1, so be careful not to use that! +VARIABLE log +logVars == <<committedEntries, log>> + +\* End of per server variables. +---- + +\* All variables; used for stuttering (asserting state hasn't changed). +vars == <<serverVars, logVars>> + +---- +\* Helpers + +IsMajority(servers) == Cardinality(servers) * 2 > Cardinality(Server) +GetTerm(xlog, index) == IF index = 0 THEN 0 ELSE xlog[index].term +LogTerm(i, index) == GetTerm(log[i], index) +LastTerm(xlog) == GetTerm(xlog, Len(xlog)) +Leaders == {s \in Server : state[s] = "Leader"} +Range(f) == {f[x] : x \in DOMAIN f} + +\* Return the maximum value from a set, or undefined if the set is empty. +Max(s) == CHOOSE x \in s : \A y \in s : x >= y + +GlobalCurrentTerm == Max(Range(currentTerm)) + +\* Server i is allowed to sync from server j. +CanSyncFrom(i, j) == + /\ Len(log[i]) < Len(log[j]) + /\ LastTerm(log[i]) = LogTerm(j, Len(log[i])) + +\* Server "me" is ahead of or caught up to server j. +NotBehind(me, j) == \/ LastTerm(log[me]) > LastTerm(log[j]) + \/ /\ LastTerm(log[me]) = LastTerm(log[j]) + /\ Len(log[me]) >= Len(log[j]) + +\* The set of nodes that has log[me][logIndex] in their oplog +Agree(me, logIndex) == + { node \in Server : + /\ Len(log[node]) >= logIndex + /\ LogTerm(me, logIndex) = LogTerm(node, logIndex) } + +\* Return whether Node i can learn the commit point from Node j. +CommitPointLessThan(i, j) == + \/ commitPoint[i].term < commitPoint[j].term + \/ /\ commitPoint[i].term = commitPoint[j].term + /\ commitPoint[i].index < commitPoint[j].index + +\* Is it possible for node i's log to roll back based on j's log? If true, it +\* implies that i's log should remove entries to become a prefix of j's. +CanRollbackOplog(i, j) == + /\ Len(log[i]) > 0 + /\ \* The log with later term is more up-to-date + LastTerm(log[i]) < LastTerm(log[j]) + /\ + \/ Len(log[i]) > Len(log[j]) + \/ /\ Len(log[i]) <= Len(log[j]) + /\ LastTerm(log[i]) /= LogTerm(j, Len(log[i])) + +---- +\* Define initial values for all variables + +InitServerVars == /\ currentTerm = [i \in Server |-> 0] + /\ state = [i \in Server |-> "Follower"] + /\ commitPoint = [i \in Server |-> [term |-> 0, index |-> 0]] +InitLogVars == /\ log = [i \in Server |-> << >>] + /\ committedEntries = {} +Init == /\ InitServerVars + /\ InitLogVars + +---- +\* Message handlers +\* i = recipient, j = sender + +\* Receive one or more oplog entries from j. +AppendOplog(i, j) == + /\ CanSyncFrom(i, j) + /\ state[i] = "Follower" + /\ \E lastAppended \in (Len(log[i]) + 1)..Len(log[j]): + LET appendedEntries == SubSeq(log[j], Len(log[i]) + 1, lastAppended) + IN log' = [log EXCEPT ![i] = log[i] \o appendedEntries] + /\ UNCHANGED <<committedEntries, serverVars>> + +\* Node i learns the commit point from j via heartbeat. +LearnCommitPoint(i, j) == + /\ CommitPointLessThan(i, j) + /\ commitPoint' = [commitPoint EXCEPT ![i] = commitPoint[j]] + /\ UNCHANGED <<committedEntries, electionVars, logVars>> + +RollbackOplog(i, j) == + /\ CanRollbackOplog(i, j) + \* Rollback 1 oplog entry + /\ LET new == [index2 \in 1..(Len(log[i]) - 1) |-> log[i][index2]] + IN log' = [log EXCEPT ![i] = new] + /\ UNCHANGED <<serverVars, committedEntries>> + +\* ACTION +\* Node i is elected by a majority, and nodes that voted for it can't still be primary. +\* A stale primary might persist among the minority that didn't vote for it. +BecomePrimaryByMagic(i, ayeVoters) == + /\ \A j \in ayeVoters : /\ NotBehind(i, j) + /\ currentTerm[j] <= currentTerm[i] + /\ IsMajority(ayeVoters) + /\ state' = [index \in Server |-> IF index \notin ayeVoters + THEN state[index] + ELSE IF index = i THEN "Leader" ELSE "Follower"] + /\ currentTerm' = [index \in Server |-> IF index \in (ayeVoters \union {i}) + THEN currentTerm[i] + 1 + ELSE currentTerm[index]] + /\ UNCHANGED <<committedEntries, commitPoint, logVars>> + + +\* ACTION +\* Node i is leader and steps down for any reason. +Stepdown(i) == + /\ state[i] = "Leader" + /\ state' = [state EXCEPT ![i] = "Follower"] + /\ UNCHANGED <<committedEntries, currentTerm, commitPoint, logVars>> + +\* ACTION +\* Leader i receives a client request to add one or more entries to the log. +\* There can be multiple leaders, each in a different term. A leader writes +\* an oplog entry in its own term. +ClientWrite(i) == + /\ state[i] = "Leader" + /\ \E numEntries \in 1..MaxClientWriteSize : + LET entry == [term |-> currentTerm[i]] + newEntries == [ j \in 1..numEntries |-> entry ] + newLog == log[i] \o newEntries + IN log' = [log EXCEPT ![i] = newLog] + /\ UNCHANGED <<committedEntries, serverVars>> + +UpdateTermThroughHeartbeat(i, j) == + /\ currentTerm[j] > currentTerm[i] + /\ currentTerm' = [currentTerm EXCEPT ![i] = currentTerm[j]] + /\ state' = [state EXCEPT ![i] = "Follower"] + /\ UNCHANGED <<commitPoint, logVars>> + +\* ACTION +AdvanceCommitPoint == + \E leader \in Leaders : + \E acknowledgers \in SUBSET Server : + \* New commitPoint is any committed log index after current commitPoint + \E committedIndex \in (commitPoint[leader].index + 1)..Len(log[leader]) : + /\ IsMajority(acknowledgers) + /\ acknowledgers \subseteq Agree(leader, committedIndex) + \* New commitPoint is an entry written by this leader. + /\ LogTerm(leader, committedIndex) = currentTerm[leader] + \* If an acknowledger has a higher term, the leader would step down. + /\ \A j \in acknowledgers : currentTerm[j] <= currentTerm[leader] + /\ LET newCommitPoint == [ + term |-> LogTerm(leader, committedIndex), + index |-> committedIndex + ] + IN /\ commitPoint' = [commitPoint EXCEPT ![leader] = newCommitPoint] + /\ committedEntries' = committedEntries \union {[ + term |-> LogTerm(leader, i), + index |-> i + ] : i \in commitPoint[leader].index + 1..committedIndex} + /\ UNCHANGED <<electionVars, log>> + +\* ACTION +\* Node i learns the commit point from j via heartbeat with term check +LearnCommitPointWithTermCheck(i, j) == + /\ LastTerm(log[i]) = commitPoint[j].term + /\ LearnCommitPoint(i, j) + +\* ACTION +\* Node i learns the commit point from j while tailing j's oplog +LearnCommitPointFromSyncSourceNeverBeyondLastApplied(i, j) == + \* j is a potential sync source, either ahead of or equal to i's oplog + /\ \/ CanSyncFrom(i, j) + \/ log[i] = log[j] + /\ CommitPointLessThan(i, j) + \* Never beyond last applied + /\ LET myCommitPoint == + \* If j's term is less than or equal to i's, commit point can be ahead. + IF commitPoint[j].term <= LastTerm(log[i]) + THEN commitPoint[j] + ELSE [term |-> LastTerm(log[i]), index |-> Len(log[i])] + IN commitPoint' = [commitPoint EXCEPT ![i] = myCommitPoint] + /\ UNCHANGED <<committedEntries, electionVars, logVars>> + +---- +AppendOplogAction == + \E i,j \in Server : AppendOplog(i, j) + +RollbackOplogAction == + \E i,j \in Server : RollbackOplog(i, j) + +BecomePrimaryByMagicAction == + \E i \in Server : \E ayeVoters \in SUBSET(Server) : BecomePrimaryByMagic(i, ayeVoters) + +StepdownAction == + \E i \in Server : Stepdown(i) + +ClientWriteAction == + \E i \in Server : ClientWrite(i) + +UpdateTermThroughHeartbeatAction == + \E i, j \in Server : UpdateTermThroughHeartbeat(i, j) + +LearnCommitPointWithTermCheckAction == + \E i, j \in Server : LearnCommitPointWithTermCheck(i, j) + +LearnCommitPointFromSyncSourceNeverBeyondLastAppliedAction == + \E i, j \in Server : LearnCommitPointFromSyncSourceNeverBeyondLastApplied(i, j) + +---- + +\* Defines how the variables may transition. +\* +\* MongoDB's commit point learning protocol has evolved as we discovered +\* protocol bugs, see: +\* +\* https://conf.tlapl.us/07_-_TLAConf19_-_William_Schultz_-_Fixing_a_MongoDB_Replication_Protocol_Bug_with_TLA.pdf +\* +Next == + \* --- Replication protocol + \/ AppendOplogAction + \/ RollbackOplogAction + \/ BecomePrimaryByMagicAction + \/ StepdownAction + \/ ClientWriteAction + \/ UpdateTermThroughHeartbeatAction + \* + \* --- Commit point learning protocol + \/ AdvanceCommitPoint + \/ LearnCommitPointWithTermCheckAction + \/ LearnCommitPointFromSyncSourceNeverBeyondLastAppliedAction + +SpecBehavior == Init /\ [][Next]_vars + +Liveness == + /\ SF_vars(AppendOplogAction) + /\ SF_vars(RollbackOplogAction) + \* A new primary should eventually write one entry. + /\ WF_vars(\E i \in Server : LastTerm(log[i]) # GlobalCurrentTerm /\ ClientWrite(i)) + \* + /\ WF_vars(AdvanceCommitPoint) + /\ SF_vars(LearnCommitPointWithTermCheckAction) + /\ SF_vars(LearnCommitPointFromSyncSourceNeverBeyondLastAppliedAction) + +\* The specification must start with the initial state and transition according +\* to Next. +Spec == SpecBehavior /\ Liveness + +---- + +\* Properties to check + +TwoPrimariesInSameTerm == + \E i, j \in Server : + /\ i # j + /\ currentTerm[i] = currentTerm[j] + /\ state[i] = "Leader" + /\ state[j] = "Leader" + +NoTwoPrimariesInSameTerm == ~TwoPrimariesInSameTerm + +RollbackCommitted(i) == + /\ [term |-> LastTerm(log[i]), index |-> Len(log[i])] \in committedEntries + /\ \E j \in Server: CanRollbackOplog(i, j) + +NeverRollbackCommitted == + \A i \in Server: ~RollbackCommitted(i) + +RollbackBeforeCommitPoint(i) == + /\ \E j \in Server: + /\ CanRollbackOplog(i, j) + /\ \/ LastTerm(log[i]) < commitPoint[i].term + \/ /\ LastTerm(log[i]) = commitPoint[i].term + /\ Len(log[i]) <= commitPoint[i].index + +NeverRollbackBeforeCommitPoint == \A i \in Server: ~RollbackBeforeCommitPoint(i) + +\* Liveness check + +\* This isn't accurate for any infinite behavior specified by Spec, but it's fine +\* for any finite behavior with the liveness we can check with the model checker. +\* This is to check at any time, if two nodes' commit points are not the same, they +\* will be the same eventually. +\* This is checked after all possible rollback is done. +CommitPointEventuallyPropagates == + /\ \A i, j \in Server: + [](commitPoint[i] # commitPoint[j] ~> + <>(~ENABLED RollbackOplogAction => commitPoint[i] = commitPoint[j])) + +=============================================================================== diff --git a/src/mongo/tla_plus/RaftMongoWithRaftReconfig.tla b/src/mongo/tla_plus/RaftMongoWithRaftReconfig.tla new file mode 100644 index 00000000000..aacb2903a1b --- /dev/null +++ b/src/mongo/tla_plus/RaftMongoWithRaftReconfig.tla @@ -0,0 +1,263 @@ +\* Copyright 2019 MongoDB, Inc. +\* +\* This work is licensed under: +\* - Creative Commons Attribution-3.0 United States License +\* http://creativecommons.org/licenses/by/3.0/us/ + +--------------------------------- MODULE RaftMongoWithRaftReconfig -------------------------------- +\* This is the formal specification for the Raft consensus algorithm in MongoDB. +\* It allows reconfig using the protocol for single server membership changes described in Raft. +\* Note that we did not choose to implement the protocol for single server membership changes +\* described in Raft. This specification was for exploratory purposes only. + +EXTENDS Naturals, FiniteSets, Sequences, TLC + +\* The set of server IDs +CONSTANTS Server + +\* Server states. +\* Candidate is not used, but this is fine. +CONSTANTS Follower, Candidate, Leader + +\* A reserved value. +CONSTANTS Nil + +---- +\* Global variables + +\* Servers in a given config version. +\* e.g. << {S1, S2}, {S1, S2, S3} >> +VARIABLE configs + +\* The set of log entries that have been acknowledged as committed, i.e. +\* "immediately committed" entries. It does not include "prefix committed" +\* entries, which are allowed to roll back on minority nodes. +VARIABLE committedEntries + +---- +\* The following variables are all per server (functions with domain Server). + +\* The server's term number. +VARIABLE currentTerm + +\* The server's state (Follower, Candidate, or Leader). +VARIABLE state + +serverVars == <<currentTerm, state>> + +\* A Sequence of log entries. The index into this sequence is the index of the +\* log entry. Unfortunately, the Sequence module defines Head(s) as the entry +\* with index 1, so be careful not to use that! +VARIABLE log +logVars == <<log, committedEntries>> + +\* End of per server variables. +---- + +\* All variables; used for stuttering (asserting state hasn't changed). +vars == <<serverVars, logVars, configs>> + +---- +\* Helpers + +\* The term of the last entry in a log, or 0 if the log is empty. +GetTerm(xlog, index) == IF index = 0 THEN 0 ELSE xlog[index].term +LogTerm(i, index) == GetTerm(log[i], index) +LastTerm(xlog) == GetTerm(xlog, Len(xlog)) + +\* Return the minimum value from a set, or undefined if the set is empty. +Min(s) == CHOOSE x \in s : \A y \in s : x <= y +\* Return the maximum value from a set, or undefined if the set is empty. +Max(s) == CHOOSE x \in s : \A y \in s : x >= y + +\* The config version in the node's last entry. +GetConfigVersion(i) == log[i][Len(log[i])].configVersion + +\* Gets the node's first entry with a given config version. +GetConfigEntry(i, configVersion) == LET configEntries == {index \in 1..Len(log[i]) : + log[i][index].configVersion = configVersion} + IN Min(configEntries) + +\* The servers that are in the same config as i. +ServerViewOn(i) == configs[GetConfigVersion(i)] + +\* The set of all quorums. This just calculates simple majorities, but the only +\* important property is that every quorum overlaps with every other. +Quorum(me) == {sub \in SUBSET(ServerViewOn(me)) : Cardinality(sub) * 2 > Cardinality(ServerViewOn(me))} + +---- +\* Define initial values for all variables +InitServerVars == /\ currentTerm = [i \in Server |-> 0] + /\ state = [i \in Server |-> Follower] +InitLogVars == /\ log = [i \in Server |-> << [term |-> 0, configVersion |-> 1] >>] + /\ committedEntries = {[term |-> 0, index |-> 1]} +InitConfigs == configs = << Server >> +Init == /\ InitServerVars + /\ InitLogVars + /\ InitConfigs + +---- +\* Message handlers +\* i = recipient, j = sender, m = message + +AppendOplog(i, j) == + /\ state[i] = Follower \* Disable primary catchup and draining + /\ j \in ServerViewOn(i) \* j is in the config of i. + /\ Len(log[i]) < Len(log[j]) + /\ LastTerm(log[i]) = LogTerm(j, Len(log[i])) + /\ log' = [log EXCEPT ![i] = Append(log[i], log[j][Len(log[i]) + 1])] + /\ UNCHANGED <<serverVars, committedEntries, configs>> + +CanRollbackOplog(i, j) == + /\ j \in ServerViewOn(i) \* j is in the config of i. + /\ Len(log[i]) > 0 + /\ \* The log with later term is more up-to-date + LastTerm(log[i]) < LastTerm(log[j]) + /\ + \/ Len(log[i]) > Len(log[j]) + \* There seems no short-cut of OR clauses, so I have to specify the negative case + \/ /\ Len(log[i]) <= Len(log[j]) + /\ LastTerm(log[i]) /= LogTerm(j, Len(log[i])) + +RollbackOplog(i, j) == + /\ CanRollbackOplog(i, j) + \* Rollback 1 oplog entry + /\ LET new == [index2 \in 1..(Len(log[i]) - 1) |-> log[i][index2]] + IN log' = [log EXCEPT ![i] = new] + /\ UNCHANGED <<serverVars, committedEntries, configs>> + +\* The set of nodes in my config that has log[me][logIndex] in their oplog +Agree(me, logIndex) == + { node \in ServerViewOn(me) : + /\ Len(log[node]) >= logIndex + /\ LogTerm(me, logIndex) = LogTerm(node, logIndex) } + +NotBehind(me, j) == \/ LastTerm(log[me]) > LastTerm(log[j]) + \/ /\ LastTerm(log[me]) = LastTerm(log[j]) + /\ Len(log[me]) >= Len(log[j]) + +\* ACTION +\* i = the new primary node. +BecomePrimaryByMagic(i, ayeVoters) == + /\ \A j \in ayeVoters : /\ i \in ServerViewOn(j) + /\ NotBehind(i, j) + /\ currentTerm[j] <= currentTerm[i] + /\ ayeVoters \in Quorum(i) + /\ state' = [index \in Server |-> IF index \notin ayeVoters + THEN state[index] + ELSE IF index = i THEN Leader ELSE Follower] + /\ currentTerm' = [index \in Server |-> IF index \in (ayeVoters \union {i}) + THEN currentTerm[i] + 1 + ELSE currentTerm[index]] + /\ UNCHANGED <<logVars, configs>> + +\* ACTION +\* Leader i receives a client request to add v to the log. +ClientWrite(i) == + /\ state[i] = Leader + /\ LET entry == [term |-> currentTerm[i], configVersion |-> GetConfigVersion(i)] + newLog == Append(log[i], entry) + IN log' = [log EXCEPT ![i] = newLog] + /\ UNCHANGED <<serverVars, committedEntries, configs>> + +\* ACTION +\* Commit the latest log entry on a primary. +AdvanceCommitPoint == + \E leader \in Server : \E acknowledgers \in SUBSET Server : + /\ state[leader] = Leader + /\ acknowledgers \subseteq Agree(leader, Len(log[leader])) + /\ acknowledgers \in Quorum(leader) + \* If we comment out the following line, a replicated log entry from old primary will voilate the safety. + \* [ P (2), S (), S ()] + \* [ S (2), S (), P (3)] + \* [ S (2), S (2), P (3)] !!! the log from term 2 shouldn't be considered as committed. + /\ LogTerm(leader, Len(log[leader])) = currentTerm[leader] + \* If an acknowledger has a higher term, the leader would step down. + /\ \A j \in acknowledgers : currentTerm[j] <= currentTerm[leader] + /\ committedEntries' = committedEntries \union {[term |-> LastTerm(log[leader]), index |-> Len(log[leader])]} + /\ UNCHANGED <<serverVars, log, configs>> + +UpdateTermThroughHeartbeat(i, j) == + /\ j \in ServerViewOn(i) \* j is in the config of i. + /\ currentTerm[j] > currentTerm[i] + /\ currentTerm' = [currentTerm EXCEPT ![i] = currentTerm[j]] + /\ state' = [state EXCEPT ![i] = IF ~(state[i] = Leader) THEN state[i] ELSE Follower] + /\ UNCHANGED <<logVars, configs>> + +Reconfig(i, newConfig) == + /\ state[i] = Leader + /\ i \in newConfig + \* Only support single node addition/removal. + /\ Cardinality(ServerViewOn(i) \ newConfig) + Cardinality(newConfig \ ServerViewOn(i)) <= 1 + \* The config entry must be committed. + /\ LET configEntry == GetConfigEntry(i, GetConfigVersion(i)) + IN [term |-> log[i][configEntry].term, index |-> configEntry] \in committedEntries + \* The primary must have committed an entry in its current term. + /\ \E entry \in committedEntries : entry.term = currentTerm[i] + /\ configs' = Append(configs, newConfig) + /\ LET entry == [term |-> currentTerm[i], configVersion |-> Len(configs) + 1] + newLog == Append(log[i], entry) + IN log' = [log EXCEPT ![i] = newLog] + /\ UNCHANGED <<serverVars, committedEntries>> + +---- +AppendOplogAction == + \E i,j \in Server : AppendOplog(i, j) + +RollbackOplogAction == + \E i,j \in Server : RollbackOplog(i, j) + +BecomePrimaryByMagicAction == + \E i \in Server : \E ayeVoters \in SUBSET(Server) : BecomePrimaryByMagic(i, ayeVoters) + +ClientWriteAction == + \E i \in Server : ClientWrite(i) + +UpdateTermThroughHeartbeatAction == + \E i,j \in Server : UpdateTermThroughHeartbeat(i, j) + +ReconfigAction == + \E i \in Server : \E newConfig \in SUBSET(Server) : Reconfig(i, newConfig) + +---- +\* Defines how the variables may transition. +Next == + \* --- Replication protocol + \/ AppendOplogAction + \/ RollbackOplogAction + \/ BecomePrimaryByMagicAction + \/ ClientWriteAction + \/ AdvanceCommitPoint + \/ ReconfigAction + \/ UpdateTermThroughHeartbeatAction + +Liveness == + /\ SF_vars(AppendOplogAction) + /\ SF_vars(RollbackOplogAction) + \* A new primary should eventually write one entry. + /\ WF_vars(\E i \in Server : LastTerm(log[i]) # currentTerm[i] /\ ClientWrite(i)) + \* /\ WF_vars(ClientWriteAction) + +\* The specification must start with the initial state and transition according +\* to Next. +Spec == Init /\ [][Next]_vars /\ Liveness + +\* RollbackCommitted and NeverRollbackCommitted are not actions. +\* They are used for verification. +RollbackCommitted(i) == + /\ [term |-> LastTerm(log[i]), index |-> Len(log[i])] \in committedEntries + /\ \E j \in Server: CanRollbackOplog(i, j) + +NeverRollbackCommitted == + \A i \in Server: ~RollbackCommitted(i) + +TwoPrimariesInSameTerm == + \E i, j \in Server : + /\ i # j + /\ currentTerm[i] = currentTerm[j] + /\ state[i] = Leader + /\ state[j] = Leader + +NoTwoPrimariesInSameTerm == ~TwoPrimariesInSameTerm + +=============================================================================== diff --git a/src/mongo/tla_plus/download-tlc.sh b/src/mongo/tla_plus/download-tlc.sh new file mode 100755 index 00000000000..b63944a7231 --- /dev/null +++ b/src/mongo/tla_plus/download-tlc.sh @@ -0,0 +1,6 @@ +#!/bin/sh + +# Downloads TLC, which is the model-checker for the TLA+ formal specifications in this directory. + +echo "Downloading tla2tools.jar" +curl -fLO https://github.com/tlaplus/tlaplus/releases/download/v1.7.0/tla2tools.jar diff --git a/src/mongo/tla_plus/model-check.sh b/src/mongo/tla_plus/model-check.sh new file mode 100755 index 00000000000..cc53169ca28 --- /dev/null +++ b/src/mongo/tla_plus/model-check.sh @@ -0,0 +1,40 @@ +#!/bin/sh + +# Execute TLC, the TLA+ model-checker, on a TLA+ specification and model config. Call like: +# +# ./model-check.sh RaftMongo +# +# Requires Java 11. You can set the JAVA_BINARY environment variable to the full path to java. + +if [ "$#" -ne 1 ]; then + echo "Usage: $0 SPEC_DIRECTORY" >&2 + exit 1 +fi +if ! [ -e "$1" ]; then + echo "Directory $1 not found" >&2 + exit 1 +fi +if ! [ -d "$1" ]; then + echo "$1 not a directory" >&2 + exit 1 +fi +if ! [ -f "tla2tools.jar" ]; then + echo "No tla2tools.jar, run download-tlc.sh first" + exit 1 +fi + +TLA_FILE="MC$1.tla" +if ! [ -f "$1/$TLA_FILE" ]; then + echo "$1/$TLA_FILE does not exist" >&2 + exit 1 +fi + +if [ -z "$JAVA_BINARY" ]; then + JAVA_BINARY=java +else + echo "Using java binary [$JAVA_BINARY]" +fi + +cd "$1" +# Defer liveness checks to the end with -lncheck, for speed. +"$JAVA_BINARY" -XX:+UseParallelGC -Dtlc2.tool.fp.FPSet.impl=tlc2.tool.fp.OffHeapDiskFPSet -Dutil.ExecutionStatisticsCollector.id=10f53a1c957c11ea94a033245b683b65 -cp ../tla2tools.jar tlc2.TLC -lncheck final -workers auto "$TLA_FILE" |