summaryrefslogtreecommitdiff
path: root/src/mongo/s/transaction_router.h
blob: 0a68361ebe4f920d72e62e53d00be4e065513ae9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
/**
 *    Copyright (C) 2018-present MongoDB, Inc.
 *
 *    This program is free software: you can redistribute it and/or modify
 *    it under the terms of the Server Side Public License, version 1,
 *    as published by MongoDB, Inc.
 *
 *    This program is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    Server Side Public License for more details.
 *
 *    You should have received a copy of the Server Side Public License
 *    along with this program. If not, see
 *    <http://www.mongodb.com/licensing/server-side-public-license>.
 *
 *    As a special exception, the copyright holders give permission to link the
 *    code of portions of this program with the OpenSSL library under certain
 *    conditions as described in each individual source file and distribute
 *    linked combinations including the program with the OpenSSL library. You
 *    must comply with the Server Side Public License in all respects for
 *    all of the code used other than as permitted herein. If you modify file(s)
 *    with this exception, you may extend this exception to your version of the
 *    file(s), but you are not obligated to do so. If you do not wish to do so,
 *    delete this exception statement from your version. If you delete this
 *    exception statement from all source files in the program, then also delete
 *    it in the license file.
 */

#pragma once

#include <boost/optional.hpp>

#include "mongo/db/api_parameters.h"
#include "mongo/db/commands/txn_cmds_gen.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/read_concern_args.h"
#include "mongo/db/session/logical_session_id.h"
#include "mongo/db/session/session_catalog.h"
#include "mongo/db/shard_id.h"
#include "mongo/db/stats/single_transaction_stats.h"
#include "mongo/s/async_requests_sender.h"
#include "mongo/s/client/shard.h"
#include "mongo/util/string_map.h"

namespace mongo {

/**
 * Keeps track of the transaction state. A session is in use when it is being used by a request.
 */
class TransactionRouter {
    struct PrivateState;
    struct ObservableState;

public:
    TransactionRouter();
    TransactionRouter(const TransactionRouter&) = delete;
    TransactionRouter& operator=(const TransactionRouter&) = delete;
    TransactionRouter(TransactionRouter&&) = delete;
    TransactionRouter& operator=(TransactionRouter&&) = delete;
    ~TransactionRouter();

    // The type of commit initiated for this transaction.
    enum class CommitType {
        kNotInitiated,
        kNoShards,
        kSingleShard,
        kSingleWriteShard,
        kReadOnly,
        kTwoPhaseCommit,
        kRecoverWithToken,
    };

    // The reason why TransactionRouter::Router::stash() is called.
    enum class StashReason { kDone, kYield };

    // The default value to use as the statement id of the first command in the transaction if none
    // was sent.
    static const StmtId kDefaultFirstStmtId = 0;

    /**
     * Represents the options for a transaction that are shared across all participants. These
     * cannot be changed without restarting the transactions that may have already been begun on
     * every participant, i.e. clearing the current participant list.
     */
    struct SharedTransactionOptions {
        // Set for all distributed transactions.
        TxnNumberAndRetryCounter txnNumberAndRetryCounter;
        APIParameters apiParameters;
        repl::ReadConcernArgs readConcernArgs;

        // Only set for transactions with snapshot level read concern.
        boost::optional<LogicalTime> atClusterTime;

        bool isInternalTransactionForRetryableWrite;
    };

    /**
     * Represents a shard participant in a distributed transaction. Lives only for the duration of
     * the transaction that created it.
     */
    struct Participant {
        enum class ReadOnly { kUnset, kReadOnly, kNotReadOnly };

        Participant(bool isCoordinator,
                    StmtId stmtIdCreatedAt,
                    ReadOnly inReadOnly,
                    SharedTransactionOptions sharedOptions);

        /**
         * Attaches necessary fields if this is participating in a multi statement transaction.
         */
        BSONObj attachTxnFieldsIfNeeded(BSONObj cmd, bool isFirstStatementInThisParticipant) const;

        // True if the participant has been chosen as the coordinator for its transaction
        const bool isCoordinator{false};

        // Is updated to kReadOnly or kNotReadOnly based on the readOnly field in the participant's
        // responses to statements.
        const ReadOnly readOnly{ReadOnly::kUnset};

        // Returns the shared transaction options this participant was created with
        const SharedTransactionOptions sharedOptions;

        // The highest statement id of the request during which this participant was created.
        const StmtId stmtIdCreatedAt;
    };

    // Container for timing stats for the current transaction. Includes helpers for calculating some
    // metrics like transaction duration.
    struct TimingStats {
        /**
         * Returns the duration of the transaction. The transaction start time must have been set
         * before this can be called.
         */
        Microseconds getDuration(TickSource* tickSource, TickSource::Tick curTicks) const;

        /**
         * Returns the duration of commit. The commit start time must have been set before this can
         * be called.
         */
        Microseconds getCommitDuration(TickSource* tickSource, TickSource::Tick curTicks) const;

        /**
         * Returns the total active time of the transaction, given the current time value. A
         * transaction is active when there is a running operation that is part of the transaction.
         */
        Microseconds getTimeActiveMicros(TickSource* tickSource, TickSource::Tick curTicks) const;

        /**
         * Returns the total inactive time of the transaction, given the current time value. A
         * transaction is inactive when it is idly waiting for a new operation to occur.
         */
        Microseconds getTimeInactiveMicros(TickSource* tickSource, TickSource::Tick curTicks) const;

        // The start time of the transaction in millisecond resolution. Used only for diagnostics
        // reporting.
        Date_t startWallClockTime;

        // The start time of the transaction. Note that tick values should only ever be used to
        // measure distance from other tick values, not for reporting absolute wall clock time. A
        // value of zero means the transaction hasn't started yet.
        TickSource::Tick startTime{0};

        // The start time of the transaction commit in millisecond resolution. Used only for
        // diagnostics reporting.
        Date_t commitStartWallClockTime;

        // When commit was started. A value of zero means the commit hasn't started yet.
        TickSource::Tick commitStartTime{0};

        // The end time of the transaction. A value of zero means the transaction hasn't ended yet.
        TickSource::Tick endTime{0};

        // The total amount of active time spent by the transaction.
        Microseconds timeActiveMicros = Microseconds{0};

        // The time at which the transaction was last marked as active. The transaction is
        // considered active if this value is not equal to 0.
        TickSource::Tick lastTimeActiveStart{0};
    };

    enum class TransactionActions { kStart, kContinue, kCommit };

    // Reason a transaction terminated.
    enum class TerminationCause {
        kCommitted,
        kAborted,
    };

    /**
     * Helper class responsible for updating per transaction and router wide transaction metrics on
     * certain transaction events.
     */
    class MetricsTracker {
    public:
        MetricsTracker(ServiceContext* service) : _service(service) {}
        MetricsTracker(const MetricsTracker&) = delete;
        MetricsTracker& operator=(const MetricsTracker&) = delete;
        MetricsTracker(MetricsTracker&&) = delete;
        MetricsTracker& operator=(MetricsTracker&&) = delete;
        ~MetricsTracker();

        bool isTrackingOver() const {
            return timingStats.endTime != 0;
        }

        bool hasStarted() const {
            return timingStats.startTime != 0;
        }

        bool isActive() const {
            return timingStats.lastTimeActiveStart != 0;
        }

        bool commitHasStarted() const {
            return timingStats.commitStartTime != 0;
        }

        const auto& getTimingStats() const {
            return timingStats;
        }

        /**
         * Marks the transaction as active and sets the start of the transaction's active time and
         * overall start time the first time it is called.
         *
         * This method is a no-op if the transaction is not currently inactive or has already ended.
         */
        void trySetActive(TickSource* tickSource, TickSource::Tick curTicks);

        /**
         * Marks the transaction as inactive, sets the total active time of the transaction, and
         * updates relevant server status counters.
         *
         * This method is a no-op if the transaction is not currently active or has already ended.
         */
        void trySetInactive(TickSource* tickSource, TickSource::Tick curTicks);

        /**
         * Marks the transaction as having begun commit, updating relevent stats. Assumes the
         * transaction is currently active.
         */
        void startCommit(TickSource* tickSource,
                         TickSource::Tick curTicks,
                         TransactionRouter::CommitType commitType,
                         std::size_t numParticipantsAtCommit);

        /**
         * Marks the transaction as over, updating stats based on the termination cause, which is
         * either commit or abort.
         */
        void endTransaction(TickSource* tickSource,
                            TickSource::Tick curTicks,
                            TransactionRouter::TerminationCause terminationCause,
                            TransactionRouter::CommitType commitType,
                            StringData abortCause);

    private:
        // Pointer to the service context used to get the tick source and router wide transaction
        // metrics decorations.
        ServiceContext* const _service;

        // Stats used for calculating durations for the active transaction.
        TransactionRouter::TimingStats timingStats;
    };

    /**
     * Encapsulates the logic around selecting a global read timestamp for a sharded transaction at
     * snapshot level read concern.
     *
     * The first command in a transaction to target at least one shard must select a cluster time
     * timestamp before targeting, but may change the timestamp before contacting any shards to
     * allow optimizing the timestamp based on the targeted shards. If the first command encounters
     * a retryable error, e.g. "retargeting needed" or SnapshotTooOld, the retry may also select a
     * new timestamp. Once the first command has successfully completed, the timestamp cannot be
     * changed.
     */
    class AtClusterTime {
    public:
        /**
         * Cannot be called until a timestamp has been set.
         */
        LogicalTime getTime() const;

        /**
         * Returns true if the _atClusterTime has been changed from the default uninitialized value.
         */
        bool timeHasBeenSet() const;

        /**
         * Sets the timestamp and remembers the statement id of the command that set it.
         */
        void setTime(LogicalTime atClusterTime, StmtId currentStmtId);

        /**
         * True if the timestamp can be changed by a command running at the given statement id.
         */
        bool canChange(StmtId currentStmtId) const;

    private:
        boost::optional<StmtId> _stmtIdSelectedAt;
        LogicalTime _atClusterTime;
    };

    /**
     * Class used by observers to examine the state of a TransactionRouter.
     */
    class Observer {
    public:
        explicit Observer(const ObservableSession& session);

        /**
         * Report the current state of an session. The sessionIsActive boolean indicates whether
         * the session and transaction are currently active.
         *
         * The Client lock for the given OperationContext must be held when calling this method in
         * the case where sessionIsActive is true.
         */
        BSONObj reportState(OperationContext* opCtx, bool sessionIsActive) const;
        void reportState(OperationContext* opCtx,
                         BSONObjBuilder* builder,
                         bool sessionIsActive) const;

        /**
         * Returns if the router has received at least one request for a transaction.
         */
        auto isInitialized() const {
            return o().txnNumberAndRetryCounter.getTxnNumber() != kUninitializedTxnNumber;
        }

        /**
         * Returns if this TransactionRouter instance can be reaped. Always true unless an operation
         * has yielded the router and has not unyielded yet. We cannot reap the instance in that
         * case or the unyield would check out a different TransactionRouter than it yielded.
         */
        auto canBeReaped() const {
            return o().activeYields == 0;
        }

    protected:
        explicit Observer(TransactionRouter* tr) : _tr(tr) {}

        const TransactionRouter::ObservableState& o() const {
            return _tr->_o;
        }

        // Reports the current state of the session using the provided builder.
        void _reportState(OperationContext* opCtx,
                          BSONObjBuilder* builder,
                          bool sessionIsActive) const;

        // Reports the 'transaction' state of this transaction for currentOp using the provided
        // builder.
        void _reportTransactionState(OperationContext* opCtx, BSONObjBuilder* builder) const;

        // Returns true if the atClusterTime has been changed from the default uninitialized value.
        bool _atClusterTimeHasBeenSet() const;

        // Shortcut to obtain the id of the session under which this transaction router runs
        const LogicalSessionId& _sessionId() const;

        TransactionRouter* _tr;
    };  // class Observer

    /**
     * Class used by a thread that has checked out the TransactionRouter's session to observe
     * and modify the transaction router.
     */
    class Router : public Observer {
    public:
        explicit Router(OperationContext* opCtx);

        explicit operator bool() const {
            return _tr;
        }

        /**
         * Starts a fresh transaction in this session or continue an existing one. Also cleans up
         * the previous transaction state.
         */
        void beginOrContinueTxn(OperationContext* opCtx,
                                TxnNumber txnNumber,
                                TransactionActions action);

        /**
         * Updates transaction diagnostics and, if necessary, the number of active yielders when the
         * transaction's session is checked in.
         */
        void stash(OperationContext* opCtx, StashReason reason);

        /**
         * Validates transaction state is still compatible after a yield.
         */
        void unstash(OperationContext* opCtx);

        /**
         * Attaches the required transaction related fields for a request to be sent to the given
         * shard.
         *
         * Calling this method has the following side effects:
         * 1. Potentially selecting a coordinator.
         * 2. Adding the shard to the list of participants.
         * 3. Also append fields for first statements (ex. startTransaction, readConcern)
         *    if the shard was newly added to the list of participants.
         */
        BSONObj attachTxnFieldsIfNeeded(OperationContext* opCtx,
                                        const ShardId& shardId,
                                        const BSONObj& cmdObj);

        /**
         * Processes the transaction metadata in the response from the participant if the response
         * indicates the operation succeeded.
         */
        void processParticipantResponse(OperationContext* opCtx,
                                        const ShardId& shardId,
                                        const BSONObj& responseObj);

        /**
         * Returns true if the current transaction can retry on a stale version error from a
         * contacted shard. This is always true except for an error received by a write that is not
         * the first overall statement in the sharded transaction. This is because the entire
         * command will be retried, and shards that were not stale and are targeted again may
         * incorrectly execute the command a second time.
         *
         * Note: Even if this method returns true, the retry attempt may still fail, e.g. if one of
         * the shards that returned a stale version error was involved in a previously completed a
         * statement for this transaction.
         *
         * TODO SERVER-37207: Change batch writes to retry only the failed writes in a batch, to
         * allow retrying writes beyond the first overall statement.
         */
        bool canContinueOnStaleShardOrDbError(StringData cmdName, const Status& status) const;

        /**
         * Updates the transaction state to allow for a retry of the current command on a stale
         * version error. This includes sending abortTransaction to all cleared participants. Will
         * throw if the transaction cannot be continued.
         */
        void onStaleShardOrDbError(OperationContext* opCtx,
                                   StringData cmdName,
                                   const Status& status);

        /**
         * Returns true if the current transaction can retry on a snapshot error. This is only true
         * on the first command recevied for a transaction.
         */
        bool canContinueOnSnapshotError() const;

        /**
         * Resets the transaction state to allow for a retry attempt. This includes clearing all
         * participants, clearing the coordinator, resetting the global read timestamp, and sending
         * abortTransaction to all cleared participants. Will throw if the transaction cannot be
         * continued.
         */
        void onSnapshotError(OperationContext* opCtx, const Status& status);

        /**
         * Updates the transaction tracking state to allow for a retry attempt on a view resolution
         * error. This includes sending abortTransaction to all cleared participants.
         */
        void onViewResolutionError(OperationContext* opCtx, const NamespaceString& nss);

        /**
         * Returns true if the associated transaction is running at snapshot level read concern.
         */
        bool mustUseAtClusterTime() const;

        /**
         * Returns the read timestamp for this transaction. Callers must verify that the read
         * timestamp has been selected for this transaction before calling this function.
         */
        LogicalTime getSelectedAtClusterTime() const;

        /**
         * Sets the atClusterTime for the current transaction to the latest time in the router's
         * logical clock. Does nothing if the transaction does not have snapshot read concern or an
         * atClusterTime has already been selected and cannot be changed.
         */
        void setDefaultAtClusterTime(OperationContext* opCtx);

        /**
         * If a coordinator has been selected for the current transaction, returns its id.
         */
        const boost::optional<ShardId>& getCoordinatorId() const;

        /**
         * If a recovery shard has been selected for the current transaction, returns its id.
         */
        const boost::optional<ShardId>& getRecoveryShardId() const;

        /**
         * Commits the transaction.
         *
         * For transactions that only did reads or only wrote to one shard, sends commit directly to
         * the participants and returns the first error response or the last (success) response.
         *
         * For transactions that performed writes to multiple shards, hands off the participant list
         * to the coordinator to do two-phase commit, and returns the coordinator's response.
         */
        BSONObj commitTransaction(OperationContext* opCtx,
                                  const boost::optional<TxnRecoveryToken>& recoveryToken);

        /**
         * Sends abort to all participants.
         *
         * Returns the first error response or the last (success) response.
         */
        BSONObj abortTransaction(OperationContext* opCtx);

        /**
         * Sends abort to all shards in the current participant list. Will retry on retryable
         * errors, but ignores the responses from each shard.
         */
        void implicitlyAbortTransaction(OperationContext* opCtx, const Status& status);

        /**
         * If a coordinator has been selected for this transaction already, constructs a recovery
         * token, which can be used to resume commit or abort of the transaction from a different
         * router.
         */
        void appendRecoveryToken(BSONObjBuilder* builder) const;

        /**
         * Returns a string with the active transaction's transaction number and logical session id
         * (i.e. the transaction id).
         */
        std::string txnIdToString() const;

        /**
         * Returns the participant for this transaction or nullptr if the specified shard is not
         * participant of this transaction.
         */
        const Participant* getParticipant(const ShardId& shard);

        /**
         * Returns the statement id of the latest received command for this transaction.
         */
        StmtId getLatestStmtId() const {
            return p().latestStmtId;
        }

        /**
         * Returns a copy of the timing stats of the transaction router's active transaction.
         */
        const auto& getTimingStats_forTest() const {
            invariant(o().metricsTracker);
            return o().metricsTracker->getTimingStats();
        }

        /**
         * Returns if the router is not currently tracking an active transaction.
         */
        bool isTrackingOver() {
            if (o().metricsTracker)
                return o().metricsTracker->isTrackingOver();
            return true;
        }

    private:
        /**
         * Resets the router's state. Used when the router sees a new transaction for the first
         * time. This is required because we don't create a new router object for each transaction,
         * but instead reuse the same object across different transactions.
         */
        void _resetRouterState(OperationContext* opCtx,
                               const TxnNumberAndRetryCounter& txnNumberAndRetryCounter);

        /**
         * Calls _resetRouterState and then resets the read concern and the cluster time of the
         * timestamp that all participant shards in the current transaction with snapshot level read
         * concern must read from.
         */
        void _resetRouterStateForStartTransaction(
            OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter);

        /**
         * Continues or restarts the currently active transaction.
         */
        void _continueTxn(OperationContext* opCtx,
                          TxnNumberAndRetryCounter txnNumberAndRetryCounter,
                          TransactionActions action);

        /**
         * Starts a new transaction or continues a transaction started by a different router to
         * recover the commit decision.
         */
        void _beginTxn(OperationContext* opCtx,
                       TxnNumberAndRetryCounter txnNumberAndRetryCounter,
                       TransactionActions action);

        /**
         * Internal method for committing a transaction. Should only throw on failure to send
         * commit.
         */
        BSONObj _commitTransaction(OperationContext* opCtx,
                                   const boost::optional<TxnRecoveryToken>& recoveryToken);

        /**
         * Retrieves the transaction's outcome from the shard specified in the recovery token.
         */
        BSONObj _commitWithRecoveryToken(OperationContext* opCtx,
                                         const TxnRecoveryToken& recoveryToken);

        /**
         * Hands off coordinating a two-phase commit across all participants to the coordinator
         * shard.
         */
        BSONObj _handOffCommitToCoordinator(OperationContext* opCtx);

        /**
         * Sets the given logical time as the atClusterTime for the transaction to be the greater of
         * the given time and the user's afterClusterTime, if one was provided.
         */
        void _setAtClusterTime(OperationContext* opCtx,
                               const boost::optional<LogicalTime>& afterClusterTime,
                               LogicalTime candidateTime);

        /**
         * Throws NoSuchTransaction if the response from abortTransaction failed with a code other
         * than NoSuchTransaction. Does not check for write concern errors.
         */
        void _assertAbortStatusIsOkOrNoSuchTransaction(
            const AsyncRequestsSender::Response& response) const;

        /**
         * If the transaction's read concern level is snapshot, asserts the participant's
         * atClusterTime matches the transaction's.
         */
        void _verifyParticipantAtClusterTime(const Participant& participant);

        /**
         * Removes all participants created during the current statement from the participant list
         * and sends abortTransaction to each if there is more than one participant and the status
         * is not stale . Waits for all responses before returning.
         */
        void _clearPendingParticipants(OperationContext* opCtx, boost::optional<Status> optStatus);

        /**
         * Creates a new participant for the shard.
         */
        TransactionRouter::Participant& _createParticipant(OperationContext* opCtx,
                                                           const ShardId& shard);

        /**
         * Sets the new readOnly value for the current participant on the shard.
         */
        void _setReadOnlyForParticipant(OperationContext* opCtx,
                                        const ShardId& shard,
                                        Participant::ReadOnly readOnly);

        /**
         * Updates relevant metrics when the router receives an explicit abort from the client.
         */
        void _onExplicitAbort(OperationContext* opCtx);

        /**
         * Updates relevant metrics when the router begins an implicit abort after an error.
         */
        void _onImplicitAbort(OperationContext* opCtx, const Status& status);

        /**
         * Updates relevant metrics when a transaction is about to begin commit.
         */
        void _onStartCommit(WithLock wl, OperationContext* opCtx);

        /**
         * Updates relevant metrics when a transaction receives a successful response for commit.
         */
        void _onSuccessfulCommit(OperationContext* opCtx);

        /**
         * Updates relevant metrics when commit receives a response with a non-retryable command
         * error per the retryable writes specification.
         */
        void _onNonRetryableCommitError(OperationContext* opCtx, Status commitStatus);

        /**
         * Updates relevent metrics when a transaction is continued.
         */
        void _onContinue(OperationContext* opCtx);

        /**
         * The first time this method is called it marks the transaction as over in the router's
         * diagnostics and will log transaction information if its duration is over the global
         * slowMS threshold or the transaction log componenet verbosity >= 1. Only meant to be
         * called when the router definitively knows the transaction's outcome, e.g. it should not
         * be invoked after a network error on commit.
         */
        void _endTransactionTrackingIfNecessary(OperationContext* opCtx,
                                                TerminationCause terminationCause);

        /**
         * Returns all participants created during the current statement.
         */
        std::vector<ShardId> _getPendingParticipants() const;

        /**
         * Prints slow transaction information to the log.
         */
        void _logSlowTransaction(OperationContext* opCtx, TerminationCause terminationCause) const;

        /**
         * Returns the LastClientInfo object.
         */
        const SingleTransactionStats::LastClientInfo& _getLastClientInfo() const;

        /**
         * Updates the LastClientInfo object with the given Client's information.
         */
        void _updateLastClientInfo(Client* client);

        /**
         * Returns true if a status contains a stale shard or db routing error and the transaction
         * is retryable
         */
        bool _errorAllowsRetryOnStaleShardOrDb(const Status& status) const;

        TransactionRouter::PrivateState& p() {
            return _tr->_p;
        }
        const TransactionRouter::PrivateState& p() const {
            return _tr->_p;
        }

        TransactionRouter::ObservableState& o(WithLock) {
            return _tr->_o;
        }
        using Observer::o;
    };  // class Router

    static Router get(OperationContext* opCtx) {
        return Router(opCtx);
    }

    static Observer get(const ObservableSession& osession) {
        return Observer(osession);
    }

private:
    /**
     * State in this struct may be read by methods of Observer or Router, and may be written by
     * methods of Router when they acquire the lock on the opCtx's Client. Access this inside
     * Observer and Router using the private o() method for reading and (Router only) the
     * o(WithLock) method for writing.
     */
    struct ObservableState {

        // Struct with fields txnNumber and txnRetryCounter.
        // If beginOrContinueTxn has been called, txnNumber and txnRetryCounter reflect
        // the router's currently active transaction. Otherwise, they are set to
        // kUninitializedTxnNumber and kUninitializedTxnRetryCounter by default.
        TxnNumberAndRetryCounter txnNumberAndRetryCounter{kUninitializedTxnNumber,
                                                          kUninitializedTxnRetryCounter};

        // Is updated at commit time to reflect which commit path was taken.
        CommitType commitType{CommitType::kNotInitiated};

        // Map of current participants of the current transaction.
        StringMap<Participant> participants;

        // The id of participant chosen as the two-phase commit coordinator. If, at commit time,
        // two-phase commit is required, the participant list is handed off to this shard. Is unset
        // until the transaction has targeted a participant, and is set to the first participant
        // targeted. Is reset if the first participant targeted returns a "needs retargeting" error.
        boost::optional<ShardId> coordinatorId;

        // The API parameters the current transaction was started with.
        APIParameters apiParameters;

        // The read concern the current transaction was started with.
        repl::ReadConcernArgs readConcernArgs;

        // The cluster time of the timestamp all participant shards in the current transaction with
        // snapshot level read concern must read from. Only set for transactions running with
        // snapshot level read concern.
        boost::optional<AtClusterTime> atClusterTime;

        // String representing the reason a transaction aborted. Either the string name of the error
        // code that led to an implicit abort or "abort" if the client sent abortTransaction.
        std::string abortCause;

        // Information about the last client to run a transaction operation on this transaction
        // router.
        SingleTransactionStats::LastClientInfo lastClientInfo;

        // Class responsible for updating per transaction and router wide transaction metrics on
        // certain transaction events. Unset until the transaction router has processed at least one
        // transaction command.
        boost::optional<MetricsTracker> metricsTracker;

        // How many operations that checked out the router's session have currently yielded it. The
        // transaction number cannot be changed until this returns to 0, otherwise we cannot
        // guarantee that unyielding the session cannot fail.
        int32_t activeYields{0};
    } _o;

    /**
     * State in this struct may be read and written by methods of the Router, only. It may
     * access the struct via the private p() accessor. No further locking is required in methods of
     * the Router.
     */
    struct PrivateState {
        // Indicates whether this is trying to recover a commitTransaction on the current
        // transaction.
        bool isRecoveringCommit{false};

        // The id of the recovery participant. Passed in the recoveryToken that is included on
        // responses to the client. Is unset until the transaction has done a write, and is set to
        // the first participant that reports having done a write. Is reset if that participant is
        // removed from the participant list because another participant targeted in the same
        // statement returned a "needs retargeting" error.
        boost::optional<ShardId> recoveryShardId;

        // The statement id of the latest received command for this transaction. For batch writes,
        // this will be the highest stmtId contained in the batch. Incremented by one if new
        // commands do not contain statement ids.
        StmtId latestStmtId{kDefaultFirstStmtId};

        // The statement id of the command that began this transaction. Defaults to zero if no
        // statement id was included in the first command.
        StmtId firstStmtId{kDefaultFirstStmtId};

        // Track whether commit or abort have been initiated.
        bool terminationInitiated{false};
    } _p;
};

}  // namespace mongo